{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "386bda37-01a1-4a68-b965-c9ba4b5b2dd1",
   "metadata": {},
   "source": [
    "### Load all disgenet GDA data\n",
    "> all_gene_disease_associations.tsv can be obtained from https://www.disgenet.org/downloads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "28ac2f3f-c2c8-490f-b652-bdb4dd1923c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "# URL of the file to download\n",
    "url = \"http://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz\"\n",
    "\n",
    "# Send a GET request to the URL\n",
    "response = requests.get(url)\n",
    "\n",
    "# Make sure the request was successful\n",
    "assert response.status_code == 200\n",
    "\n",
    "# Write the content of the response to a file\n",
    "with open(\"all_gene_disease_associations.tsv.gz\", \"wb\") as file:\n",
    "    file.write(response.content)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "2e3670df-ba94-4f0c-8426-3d95d638d1e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of unique 'diseaseDes': 30170\n",
      "Number of unique 'proteinSeq': 21671\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1134942"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Using '\\t' as the delimiter because the file appears to be a TSV (Tab-separated values) file.\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/all_gene_disease_associations.tsv', delimiter='\\t', error_bad_lines=False)\n",
    "\n",
    "# Rest of your code\n",
    "num_unique_diseaseId = df['diseaseId'].nunique()\n",
    "num_unique_geneId = df['geneId'].nunique()\n",
    "print(f\"Number of unique 'diseaseDes': {num_unique_diseaseId}\")\n",
    "print(f\"Number of unique 'proteinSeq': {num_unique_geneId}\")\n",
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "e9bac009-b622-40c8-919c-824dbe3808fd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C0019196</td>\n",
       "      <td>ACP2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C0022578</td>\n",
       "      <td>ACP2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C0026709</td>\n",
       "      <td>ACP2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C0026827</td>\n",
       "      <td>ACP2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C0042963</td>\n",
       "      <td>ACP2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  diseaseId geneSymbol proteinSeq diseaseDes  score\n",
       "0  C0019196       ACP2        NaN        NaN    NaN\n",
       "1  C0022578       ACP2        NaN        NaN    NaN\n",
       "2  C0026709       ACP2        NaN        NaN    NaN\n",
       "3  C0026827       ACP2        NaN        NaN    NaN\n",
       "4  C0042963       ACP2        NaN        NaN    NaN"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_updated.csv')\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "d6126d9b-8522-4f9b-8a89-e71313409e47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of unique 'diseaseDes': 28873\n",
      "Number of unique 'proteinSeq': 16622\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1041587"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_latest.csv')\n",
    "num_unique_diseaseId = df['diseaseDes'].nunique()\n",
    "num_unique_geneId = df['proteinSeq'].nunique()\n",
    "print(f\"Number of unique 'diseaseDes': {num_unique_diseaseId}\")\n",
    "print(f\"Number of unique 'proteinSeq': {num_unique_geneId}\")\n",
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "f6b6404a-3696-4e1e-a974-64aba8c027a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>Y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>A2M</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>NAT1</td>\n",
       "      <td>MLLLLLHKEAALEPQGPAVGYNSLPVSDDHHVSGIQARKKQQSVFW...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>NAT2</td>\n",
       "      <td>MDIEAYFERIGYKNSRNKLDLETLTDILEHQIRAVPFENLNMHCGQ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>SERPINA3</td>\n",
       "      <td>MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  diseaseId                                         diseaseDes geneSymbol  \\\n",
       "0  C0002395  Alzheimer's disease is a degenerative disease ...       A1BG   \n",
       "1  C0002395  Alzheimer's disease is a degenerative disease ...        A2M   \n",
       "2  C0002395  Alzheimer's disease is a degenerative disease ...       NAT1   \n",
       "3  C0002395  Alzheimer's disease is a degenerative disease ...       NAT2   \n",
       "4  C0002395  Alzheimer's disease is a degenerative disease ...   SERPINA3   \n",
       "\n",
       "                                          proteinSeq  Y  \n",
       "0  MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  0  \n",
       "1  MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  1  \n",
       "2  MLLLLLHKEAALEPQGPAVGYNSLPVSDDHHVSGIQARKKQQSVFW...  1  \n",
       "3  MDIEAYFERIGYKNSRNKLDLETLTDILEHQIRAVPFENLNMHCGQ...  1  \n",
       "4  MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...  1  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 读取CSV文件\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_latest.csv')\n",
    "\n",
    "# 移除缺失proteinSeq的行\n",
    "df = df[df['proteinSeq'].notna()]\n",
    "\n",
    "# 获取与diseaseId C0002395相关的diseaseDes\n",
    "desired_diseaseId = 'C0002395'\n",
    "desired_diseaseDes = df[df['diseaseId'] == desired_diseaseId]['diseaseDes'].iloc[0]\n",
    "\n",
    "# 获取与diseaseDes相关的proteinSeq\n",
    "related_proteins = df[df['diseaseDes'] == desired_diseaseDes]['proteinSeq'].unique()\n",
    "\n",
    "# 为每个proteinSeq和geneSymbol组合确定Y值\n",
    "df['Y'] = df['proteinSeq'].isin(related_proteins).astype(int)\n",
    "\n",
    "# 创建一个新的DataFrame来储存结果\n",
    "new_df = pd.DataFrame({\n",
    "    'diseaseId': desired_diseaseId,\n",
    "    'diseaseDes': desired_diseaseDes,\n",
    "    'geneSymbol': df['geneSymbol'],\n",
    "    'proteinSeq': df['proteinSeq'],\n",
    "    'Y': df['Y']\n",
    "})\n",
    "\n",
    "# 去除重复项并重置索引\n",
    "new_df = new_df.drop_duplicates().reset_index(drop=True)\n",
    "\n",
    "new_df.head()\n",
    "new_df.to_csv('Alzheimer_disease.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "838577c5-3485-426e-a101-082331c2d1c1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Count for Y=0: 3083\n",
      "Count for Y=1: 3083\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df=pd.read_csv('/nfs/dpa_pretrain/data/downstream/alzheimer_new.csv')\n",
    "y_counts = df['Y'].value_counts()\n",
    "count_Y_0 = y_counts.get(0, 0)\n",
    "count_Y_1 = y_counts.get(1, 0)\n",
    "\n",
    "print(f\"Count for Y=0: {count_Y_0}\")\n",
    "print(f\"Count for Y=1: {count_Y_1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "5ad4e8bd-3407-4798-940a-dd76b3b61b28",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_df.to_csv('Alzheimer_disease.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "945eefab-ba3a-4610-80d6-343c892bd57b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16733"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9f126180-2971-43c7-aec5-93ee67f50d64",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of entries in disgenet_latest.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: 3076\n",
      "Number of entries in disgenet_gda.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: 2954\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from itertools import product\n",
    "\n",
    "# Load data from CSV files\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_latest.csv')\n",
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_gda.csv')\n",
    "\n",
    "# Filter rows with diseaseId = C0002395 and non-empty diseaseDes and proteinSeq\n",
    "filtered_df = df[(df['diseaseId'] == 'C0699791') & df['diseaseDes'].notna() & df['proteinSeq'].notna()]\n",
    "filtered_df_g = df_g[(df_g['diseaseId'] == 'C0699791') & df_g['diseaseDes'].notna() & df_g['proteinSeq'].notna()]\n",
    "\n",
    "# Print the number of rows\n",
    "print(f\"Number of entries in disgenet_latest.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: {len(filtered_df)}\")\n",
    "print(f\"Number of entries in disgenet_gda.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: {len(filtered_df_g)}\")\n",
    "\n",
    "# Extract unique disease descriptions and protein sequences\n",
    "disease_des_stomach = filtered_df['diseaseDes'].unique()\n",
    "unique_proteinSeq_from_latest = df[df['proteinSeq'].notna()]['proteinSeq'].unique()\n",
    "unique_proteinSeq_from_gda = filtered_df_g['proteinSeq'].unique()\n",
    "\n",
    "# Filter out proteinSeq that appear in disgenet_gda.csv\n",
    "unique_proteinSeq = [seq for seq in unique_proteinSeq_from_latest if seq not in unique_proteinSeq_from_gda]\n",
    "\n",
    "# # Create all possible diseaseDes - proteinSeq pairs, keeping geneSymbol and geneId\n",
    "# pairs = []\n",
    "# for des in disease_des_stomach:\n",
    "#     for seq in unique_proteinSeq:\n",
    "#         row = filtered_df[filtered_df['proteinSeq'] == seq].iloc[0] if seq in filtered_df['proteinSeq'].values else None\n",
    "#         geneSymbol = row['geneSymbol'] if row is not None else 'NA'\n",
    "#         geneId = row['geneId'] if row is not None else 'NA'\n",
    "#         pairs.append([des, seq, 0, geneSymbol, geneId])  # Y initialized to 0\n",
    "\n",
    "# Create a DataFrame with the pairs\n",
    "result_df = pd.DataFrame(pairs, columns=['diseaseDes', 'proteinSeq', 'Y', 'geneSymbol', 'geneId'])\n",
    "result_df['Y'] = 0\n",
    "# result_df = pd.DataFrame(pairs, columns=['diseaseDes', 'proteinSeq', 'geneSymbol', 'geneId'])\n",
    "# Save the DataFrame to a CSV file\n",
    "result_df.to_csv('updated_stomach.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "a0e57000-830b-4dee-81f9-3b09596f3e2b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of rows where Y = 1: 134\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Load data from CSV files\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_latest.csv')\n",
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_gda.csv')\n",
    "\n",
    "# Filter rows with diseaseId = C0699791 and non-empty diseaseDes and proteinSeq\n",
    "filtered_df = df[(df['diseaseId'] == 'C0699791') & df['diseaseDes'].notna() & df['proteinSeq'].notna()]\n",
    "filtered_df_g = df_g[(df_g['diseaseId'] == 'C0699791') & df_g['diseaseDes'].notna() & df_g['proteinSeq'].notna()]\n",
    "\n",
    "# Extract all unique gene symbols from disgenet_gda.csv with diseaseId = C0699791\n",
    "gene_symbols_to_exclude = filtered_df_g['geneSymbol'].unique()\n",
    "\n",
    "# Extract all unique protein sequences and corresponding gene symbols from disgenet_latest.csv\n",
    "unique_proteins_and_symbols = df[df['proteinSeq'].notna()][['proteinSeq', 'geneSymbol']].drop_duplicates()\n",
    "\n",
    "# Exclude rows where geneSymbol appears in gene_symbols_to_exclude\n",
    "unique_proteins_and_symbols_filtered = unique_proteins_and_symbols[~unique_proteins_and_symbols['geneSymbol'].isin(gene_symbols_to_exclude)]\n",
    "\n",
    "# Create all possible diseaseDes - proteinSeq pairs, keeping geneSymbol and geneId\n",
    "pairs = []\n",
    "for des in filtered_df['diseaseDes'].unique():\n",
    "    for _, row in unique_proteins_and_symbols_filtered.iterrows():\n",
    "        seq, geneSymbol = row['proteinSeq'], row['geneSymbol']\n",
    "        geneId = filtered_df.loc[filtered_df['geneSymbol'] == geneSymbol, 'geneId'].iloc[0] if geneSymbol in filtered_df['geneSymbol'].values else 'NA'\n",
    "        Y_value = 1 if seq in filtered_df['proteinSeq'].values else 0\n",
    "        pairs.append([des, seq, Y_value, geneSymbol, geneId])\n",
    "\n",
    "# Create a DataFrame with the pairs\n",
    "result_df = pd.DataFrame(pairs, columns=['diseaseDes', 'proteinSeq', 'Y', 'geneSymbol', 'geneId'])\n",
    "\n",
    "# Count the number of rows where Y = 1\n",
    "count_Y_equals_1 = result_df[result_df['Y'] == 1].shape[0]\n",
    "\n",
    "print(f\"Number of rows where Y = 1: {count_Y_equals_1}\")\n",
    "\n",
    "# Save the DataFrame to a CSV file\n",
    "result_df.to_csv('stomach.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "ec460aed-a959-4595-bf5b-2d29695925a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13788"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/stomach.csv')\n",
    "# df_g.head(5)\n",
    "len(df_g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "8d5b8027-8241-4bb1-884f-07d7d4673519",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>Y</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>geneId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A malignant epithelial tumor of the stomach mu...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>0</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A malignant epithelial tumor of the stomach mu...</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>0</td>\n",
       "      <td>A2M</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A malignant epithelial tumor of the stomach mu...</td>\n",
       "      <td>MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...</td>\n",
       "      <td>0</td>\n",
       "      <td>SERPINA3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A malignant epithelial tumor of the stomach mu...</td>\n",
       "      <td>MGRKSLYLLIVGILIAYYIYTPLPDNVEEPWRMMWINAHLKTIQNL...</td>\n",
       "      <td>0</td>\n",
       "      <td>AADAC</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A malignant epithelial tumor of the stomach mu...</td>\n",
       "      <td>MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPADDLAQ...</td>\n",
       "      <td>0</td>\n",
       "      <td>AAMP</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          diseaseDes  \\\n",
       "0  A malignant epithelial tumor of the stomach mu...   \n",
       "1  A malignant epithelial tumor of the stomach mu...   \n",
       "2  A malignant epithelial tumor of the stomach mu...   \n",
       "3  A malignant epithelial tumor of the stomach mu...   \n",
       "4  A malignant epithelial tumor of the stomach mu...   \n",
       "\n",
       "                                          proteinSeq  Y geneSymbol  geneId  \n",
       "0  MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  0       A1BG     NaN  \n",
       "1  MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  0        A2M     NaN  \n",
       "2  MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...  0   SERPINA3     NaN  \n",
       "3  MGRKSLYLLIVGILIAYYIYTPLPDNVEEPWRMMWINAHLKTIQNL...  0      AADAC     NaN  \n",
       "4  MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPADDLAQ...  0       AAMP     NaN  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "c44d2061-08d3-4e37-9f41-32c8b24a181f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of rows where Y = 1: 183\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Load data from CSV files\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_latest.csv')\n",
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_gda.csv')\n",
    "\n",
    "# Filter rows with diseaseId = C0002395 and non-empty diseaseDes and proteinSeq\n",
    "filtered_df = df[(df['diseaseId'] == 'C0002395') & df['diseaseDes'].notna() & df['proteinSeq'].notna()]\n",
    "filtered_df_g = df_g[(df_g['diseaseId'] == 'C0002395') & df_g['diseaseDes'].notna() & df_g['proteinSeq'].notna()]\n",
    "\n",
    "# Extract all unique gene symbols from disgenet_gda.csv with diseaseId = C0699791\n",
    "gene_symbols_to_exclude = filtered_df_g['geneSymbol'].unique()\n",
    "\n",
    "# Extract all unique protein sequences and corresponding gene symbols from disgenet_latest.csv\n",
    "unique_proteins_and_symbols = df[df['proteinSeq'].notna()][['proteinSeq', 'geneSymbol']].drop_duplicates()\n",
    "\n",
    "# Exclude rows where geneSymbol appears in gene_symbols_to_exclude\n",
    "unique_proteins_and_symbols_filtered = unique_proteins_and_symbols[~unique_proteins_and_symbols['geneSymbol'].isin(gene_symbols_to_exclude)]\n",
    "\n",
    "# Create all possible diseaseDes - proteinSeq pairs, keeping geneSymbol and geneId\n",
    "pairs = []\n",
    "for des in filtered_df['diseaseDes'].unique():\n",
    "    for _, row in unique_proteins_and_symbols_filtered.iterrows():\n",
    "        seq, geneSymbol = row['proteinSeq'], row['geneSymbol']\n",
    "        geneId = filtered_df.loc[filtered_df['geneSymbol'] == geneSymbol, 'geneId'].iloc[0] if geneSymbol in filtered_df['geneSymbol'].values else 'NA'\n",
    "        Y_value = 1 if seq in filtered_df['proteinSeq'].values else 0\n",
    "        pairs.append([des, seq, Y_value, geneSymbol, geneId])\n",
    "\n",
    "# Create a DataFrame with the pairs\n",
    "result_df = pd.DataFrame(pairs, columns=['diseaseDes', 'proteinSeq', 'Y', 'geneSymbol', 'geneId'])\n",
    "\n",
    "# Count the number of rows where Y = 1\n",
    "count_Y_equals_1 = result_df[result_df['Y'] == 1].shape[0]\n",
    "\n",
    "print(f\"Number of rows where Y = 1: {count_Y_equals_1}\")\n",
    "\n",
    "# Save the DataFrame to a CSV file\n",
    "result_df.to_csv('alzheimer.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "3b5b9c9e-89e5-45eb-a2ca-93a1947924d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13833"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/alzheimer.csv')\n",
    "len(df_g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "7e3b7135-8021-459c-8514-10b98166c3cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>Y</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>geneId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>0</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>1</td>\n",
       "      <td>A2M</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...</td>\n",
       "      <td>1</td>\n",
       "      <td>SERPINA3</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>MGRKSLYLLIVGILIAYYIYTPLPDNVEEPWRMMWINAHLKTIQNL...</td>\n",
       "      <td>0</td>\n",
       "      <td>AADAC</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alzheimer's disease is a degenerative disease ...</td>\n",
       "      <td>MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPADDLAQ...</td>\n",
       "      <td>0</td>\n",
       "      <td>AAMP</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          diseaseDes  \\\n",
       "0  Alzheimer's disease is a degenerative disease ...   \n",
       "1  Alzheimer's disease is a degenerative disease ...   \n",
       "2  Alzheimer's disease is a degenerative disease ...   \n",
       "3  Alzheimer's disease is a degenerative disease ...   \n",
       "4  Alzheimer's disease is a degenerative disease ...   \n",
       "\n",
       "                                          proteinSeq  Y geneSymbol  geneId  \n",
       "0  MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  0       A1BG     NaN  \n",
       "1  MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  1        A2M     2.0  \n",
       "2  MERMLPLLALGLLAAGFCPAVLCHPNSPLDEENLTQENQDRGTHVD...  1   SERPINA3    12.0  \n",
       "3  MGRKSLYLLIVGILIAYYIYTPLPDNVEEPWRMMWINAHLKTIQNL...  0      AADAC     NaN  \n",
       "4  MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPADDLAQ...  0       AAMP     NaN  "
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "b3afe9e9-fe11-4f87-a927-6e25a980afa9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of entries in disgenet_latest.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: 3076\n",
      "Number of entries in disgenet_gda.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: 2954\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 从两个CSV文件中读取数据\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_latest.csv')\n",
    "df_g = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_gda.csv')\n",
    "\n",
    "# 找到 diseaseId = C0699791 且 diseaseDes 和 proteinSeq 不为空的行\n",
    "filtered_df = df[(df['diseaseId'] == 'C0699791') & df['proteinSeq'].notna()]\n",
    "filtered_df_g = df_g[(df_g['diseaseId'] == 'C0699791')& df_g['proteinSeq'].notna()]\n",
    "\n",
    "# 打印出数量\n",
    "print(f\"Number of entries in disgenet_latest.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: {len(filtered_df)}\")\n",
    "print(f\"Number of entries in disgenet_gda.csv for diseaseId = C0699791 with non-empty diseaseDes and proteinSeq: {len(filtered_df_g)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e86bc43c-fedb-4250-83a4-ec9a9738eda4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Read the CSV files\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_latest.csv')\n",
    "df_g = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/disgenet_gda.csv')\n",
    "\n",
    "# Merge the two DataFrames using an outer join based on the \"geneId\" and \"diseaseId\" columns\n",
    "merged_df = pd.merge(df, df_g, on=['geneSymbol', 'diseaseId'], how='outer', indicator=True)\n",
    "\n",
    "# Filter the merged DataFrame to keep only the rows that are present in one DataFrame but not in the other\n",
    "differences_df = merged_df[merged_df['_merge'] != 'both']\n",
    "\n",
    "# Save the differences to a CSV file\n",
    "differences_df.to_csv('/nfs/dpa_pretrain/notebooks/data_processing/differences.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a9091364-7d17-431a-92e9-1c0064b1c357",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('/nfs/dpa_pretrain/notebooks/data_processing/differences.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc0b9382-2338-4d99-b085-ed64edb723be",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2d50c173-9f27-44de-af8a-bef2b0a8e380",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "83688"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/differences.csv')\n",
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b93814e-6e5f-4f97-beb2-07af77b09dd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 读取两个文件\n",
    "df_diff = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/differences.csv')\n",
    "df_disgenet = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_latest.csv')\n",
    "\n",
    "# 根据diseaseId列进行匹配\n",
    "merged = pd.merge(df_diff, df_disgenet, on='diseaseId', how='inner')\n",
    "\n",
    "# 选择需要的列\n",
    "result = merged[['diseaseId', 'proteinSeq', 'diseaseDes', 'score']]\n",
    "\n",
    "# 保存到新的文件中\n",
    "result.to_csv('/nfs/dpa_pretrain/data/pretrain/differences_updated.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2fabefea-365f-4c2c-afd9-7404769c9b50",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Read file 1\n",
    "df1 = pd.read_csv('gene_associations.tsv', sep=\"\\t\")\n",
    "print(f\"the number of gene: {len(df1.index)}\")\n",
    "\n",
    "# Read file 2\n",
    "df2 = pd.read_csv('disease_associations.tsv', sep=\"\\t\")\n",
    "print(f\"the number of disease: {len(df2.index)}\")\n",
    "\n",
    "# Concatenate the dataframes along the columns\n",
    "df = pd.concat([df1, df2], axis=1)\n",
    "\n",
    "# # Save merged file\n",
    "# df.to_csv('all_gene_disease_associations.tsv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "1d751c6d-2c66-457d-a6a4-ce62ab818d7c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>PLI</th>\n",
       "      <th>protein_class_name</th>\n",
       "      <th>protein_class</th>\n",
       "      <th>NofDiseases</th>\n",
       "      <th>NofPmids</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>4.991700e-09</td>\n",
       "      <td>Receptor</td>\n",
       "      <td>DTO_05007575</td>\n",
       "      <td>27</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>A2M</td>\n",
       "      <td>0.529</td>\n",
       "      <td>0.769</td>\n",
       "      <td>4.522900e-11</td>\n",
       "      <td>Enzyme modulator</td>\n",
       "      <td>DTO_05007584</td>\n",
       "      <td>147</td>\n",
       "      <td>145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>A2MP1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9</td>\n",
       "      <td>NAT1</td>\n",
       "      <td>0.536</td>\n",
       "      <td>0.846</td>\n",
       "      <td>1.929400e-14</td>\n",
       "      <td>Enzyme</td>\n",
       "      <td>DTO_05007624</td>\n",
       "      <td>133</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10</td>\n",
       "      <td>NAT2</td>\n",
       "      <td>0.451</td>\n",
       "      <td>0.885</td>\n",
       "      <td>3.274400e-06</td>\n",
       "      <td>Enzyme</td>\n",
       "      <td>DTO_05007624</td>\n",
       "      <td>311</td>\n",
       "      <td>627</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   geneId geneSymbol    DSI    DPI           PLI protein_class_name  \\\n",
       "0       1       A1BG  0.700  0.538  4.991700e-09           Receptor   \n",
       "1       2        A2M  0.529  0.769  4.522900e-11   Enzyme modulator   \n",
       "2       3      A2MP1    NaN    NaN           NaN                NaN   \n",
       "3       9       NAT1  0.536  0.846  1.929400e-14             Enzyme   \n",
       "4      10       NAT2  0.451  0.885  3.274400e-06             Enzyme   \n",
       "\n",
       "  protein_class  NofDiseases  NofPmids  \n",
       "0  DTO_05007575           27        20  \n",
       "1  DTO_05007584          147       145  \n",
       "2           NaN            1         1  \n",
       "3  DTO_05007624          133       184  \n",
       "4  DTO_05007624          311       627  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.read_csv('gene_associations.tsv', sep=\"\\t\")\n",
    "df1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6e398efa-85d1-4c52-bebb-5370e52e1d87",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['geneId', 'geneSymbol', 'DSI', 'DPI', 'PLI', 'protein_class_name',\n",
       "       'protein_class', 'NofDiseases', 'NofPmids'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "08b6684d-41cf-4a05-bbc0-84849fb1c077",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['diseaseId', 'diseaseName', 'diseaseType', 'diseaseClass',\n",
       "       'diseaseSemanticType', 'NofGenes', 'NofPmids'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2cd016c9-30cb-4966-a5c2-e55261f55adb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "      <th>diseaseType</th>\n",
       "      <th>diseaseClass</th>\n",
       "      <th>diseaseSemanticType</th>\n",
       "      <th>NofGenes</th>\n",
       "      <th>NofPmids</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C0000727</td>\n",
       "      <td>Abdomen, Acute</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C23</td>\n",
       "      <td>Sign or Symptom</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C0000729</td>\n",
       "      <td>Abdominal Cramps</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C16</td>\n",
       "      <td>Sign or Symptom</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C0000731</td>\n",
       "      <td>Abdomen distended</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C06</td>\n",
       "      <td>Finding</td>\n",
       "      <td>103</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  diseaseId        diseaseName diseaseType diseaseClass diseaseSemanticType  \\\n",
       "0  C0000727     Abdomen, Acute   phenotype          C23     Sign or Symptom   \n",
       "1  C0000729   Abdominal Cramps   phenotype          C16     Sign or Symptom   \n",
       "2  C0000731  Abdomen distended   phenotype          C06             Finding   \n",
       "\n",
       "   NofGenes  NofPmids  \n",
       "0         2         2  \n",
       "1         1         1  \n",
       "2       103         0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac95fa22-9cac-49c6-8a63-55d74b8e3446",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Found local copy...\n",
      "Loading...\n",
      "Done!\n",
      "Binariztion using threshold 0, default, we assume the smaller values are 1 and larger ones is 0, you can change the order by 'binarize(order = 'ascending')'\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGkCAYAAAArG8+aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAA9hAAAPYQGoP6dpAABWRUlEQVR4nO3dd3hT9f4H8HeSNulO94IuoGwoG1kWEK1scAEKtiDoFRAB8Wr1Bwgq1atAnYDeK/MiKiKiDEHmFZC9REDKKIXSltI23WmbnN8fbSKhLXQkPTnJ+/U8eZ7kzE/T5ORzvlMmCIIAIiIiIgmSix0AERERUV0xkSEiIiLJYiJDREREksVEhoiIiCSLiQwRERFJFhMZIiIikiwmMkRERCRZTGSIiIhIspjIEBERkWQxkSGrdfXqVchkMnz44YdmO+aePXsgk8mwZ88esx3T4K233oJMJjP7cavSt29f9O3b1/ja8HetX7++Qc4fFxeH8PDwBjlXXeXn52PixIkIDAyETCbD9OnTa30MmUyGt956y/h6xYoVkMlkuHr1qtniJKL6YSJDZmW40B89elTsUOrF8HcYHk5OTggODkZMTAw+/vhj5OXlmeU8qampeOutt3Dy5EmzHM+crDm2mliwYAFWrFiBF198EatXr8a4cePEDsmqbdmyxSRpu5++ffsavx9yuRweHh5o0aIFxo0bhx07dtQrls8//xwrVqyo1zHMRerfA3vARIboHubPn4/Vq1djyZIleOmllwAA06dPR7t27XD69GmTbf/v//4PRUVFtTp+amoq5s2bV+uL5Pbt27F9+/Za7VNb94rtyy+/xIULFyx6/vratWsXHnjgAcydOxdjx45F586d633McePGoaioCGFhYWaI0Lps2bIF8+bNq9U+jRs3xurVq7Fq1Sp88MEHGDZsGA4cOIBHHnkEo0aNQmlpaZ1isbZEpi7fUWo4DmIHQGTNBg4ciC5duhhfx8fHY9euXRgyZAiGDRuGc+fOwdnZGQDg4OAABwfLfqUKCwvh4uICpVJp0fPcj6Ojo6jnr4mMjAy0bt3arMdUKBRQKBRmPaaUqdVqjB071mTZe++9h2nTpuHzzz9HeHg43n//fZGiI3vBEhlqcCUlJZgzZw46d+4MtVoNV1dX9OnTB7t37652n8WLFyMsLAzOzs6Ijo7GH3/8UWmb8+fP44knnoC3tzecnJzQpUsXbNq0yezx9+/fH7Nnz0ZycjLWrFljXF5VG5kdO3agd+/e8PT0hJubG1q0aIE33ngDQHm7lq5duwIAxo8fbyymN9yJ9u3bF23btsWxY8fw4IMPwsXFxbjv3W1kDHQ6Hd544w0EBgbC1dUVw4YNQ0pKisk24eHhiIuLq7Tvnce8X2xVtZEpKCjAK6+8gpCQEKhUKrRo0QIffvghBEEw2U4mk2Hq1KnYuHEj2rZtC5VKhTZt2mDbtm1Vv+F3ycjIwHPPPYeAgAA4OTkhKioKK1euNK43tBe6cuUKNm/ebIz9Xu1atFotZsyYAT8/P7i7u2PYsGG4fv16pe2qaiNz9OhRxMTEwNfXF87OzoiIiMCECRNM9rt9+zbGjRsHDw8PeHp6IjY2FqdOnTJ5Tw1q8jk2xLF//37MnDkTfn5+cHV1xciRI3Hr1q1KcW/duhV9+vSBq6sr3N3dMXjwYJw9e9a4Pi4uDp999hkAmFSp1oVCocDHH3+M1q1b49NPP4VGozGuW758Ofr37w9/f3+oVCq0bt0aS5YsMdk/PDwcZ8+exd69e41xGD6XWVlZmDVrFtq1awc3Nzd4eHhg4MCBOHXqVKU4PvnkE7Rp0wYuLi7w8vJCly5dsHbtWpNtbty4gQkTJiAgIMD4Ofzqq6+M6+/3PSDrwBIZanC5ubn497//jTFjxmDSpEnIy8vDf/7zH8TExODw4cPo0KGDyfarVq1CXl4epkyZguLiYnz00Ufo378/zpw5g4CAAADA2bNn0atXLzRq1Aivv/46XF1d8e2332LEiBH4/vvvMXLkSLP+DePGjcMbb7yB7du3Y9KkSVVuc/bsWQwZMgTt27fH/PnzoVKpkJSUhP379wMAWrVqhfnz52POnDl4/vnn0adPHwBAz549jce4ffs2Bg4ciNGjR2Ps2LHGv7c67777LmQyGV577TVkZGQgMTERAwYMwMmTJ40lRzVRk9juJAgChg0bht27d+O5555Dhw4d8Msvv+DVV1/FjRs3sHjxYpPtf/vtN2zYsAGTJ0+Gu7s7Pv74Yzz++OO4du0afHx8qo2rqKgIffv2RVJSEqZOnYqIiAh89913iIuLQ05ODl5++WW0atUKq1evxowZM9C4cWO88sorAAA/P79qjztx4kSsWbMGTz/9NHr27Ildu3Zh8ODB932fMjIy8Mgjj8DPzw+vv/46PD09cfXqVWzYsMG4jV6vx9ChQ3H48GG8+OKLaNmyJX788UfExsZWOl5tP8cvvfQSvLy8MHfuXFy9ehWJiYmYOnUqvvnmG+M2q1evRmxsLGJiYvD++++jsLAQS5YsQe/evXHixAmEh4fjhRdeQGpqKnbs2IHVq1ff9+++H4VCgTFjxmD27Nn47bffjO/lkiVL0KZNGwwbNgwODg746aefMHnyZOj1ekyZMgUAkJiYiJdeeglubm548803AcD4ub98+TI2btyIJ598EhEREUhPT8eyZcsQHR2NP//8E8HBwQDKqz6nTZuGJ554Ai+//DKKi4tx+vRpHDp0CE8//TQAID09HQ888IAxsfbz88PWrVvx3HPPITc3F9OnT6/194BEIhCZ0fLlywUAwpEjR6rdpqysTNBqtSbLsrOzhYCAAGHChAnGZVeuXBEACM7OzsL169eNyw8dOiQAEGbMmGFc9tBDDwnt2rUTiouLjcv0er3Qs2dPITIy0rhs9+7dAgBh9+7d9f471Gq10LFjR+PruXPnCnd+pRYvXiwAEG7dulXtMY4cOSIAEJYvX15pXXR0tABAWLp0aZXroqOjK/1djRo1EnJzc43Lv/32WwGA8NFHHxmXhYWFCbGxsfc95r1ii42NFcLCwoyvN27cKAAQ3nnnHZPtnnjiCUEmkwlJSUnGZQAEpVJpsuzUqVMCAOGTTz6pdK47JSYmCgCENWvWGJeVlJQIPXr0ENzc3Ez+9rCwMGHw4MH3PJ4gCMLJkycFAMLkyZNNlj/99NMCAGHu3LnGZYbPxZUrVwRBEIQffvjhvp+T77//XgAgJCYmGpfpdDqhf//+ld7fmn6ODXEMGDBA0Ov1xuUzZswQFAqFkJOTIwiCIOTl5Qmenp7CpEmTTGJKS0sT1Gq1yfIpU6YItflJiI6OFtq0aVPtesN7c+dnr7CwsNJ2MTExQpMmTUyWtWnTxuSzaFBcXCzodDqTZVeuXBFUKpUwf/5847Lhw4ffMzZBEITnnntOCAoKEjIzM02Wjx49WlCr1cZY7/U9IOvAqiVqcAqFwtjGQ6/XIysrC2VlZejSpQuOHz9eafsRI0agUaNGxtfdunVD9+7dsWXLFgDlxc27du3CU089hby8PGRmZiIzMxO3b99GTEwMLl68iBs3bpj973Bzc7tn7yVPT08AwI8//gi9Xl+nc6hUKowfP77G2z/77LNwd3c3vn7iiScQFBRkfK8sZcuWLVAoFJg2bZrJ8ldeeQWCIGDr1q0mywcMGICmTZsaX7dv3x4eHh64fPnyfc8TGBiIMWPGGJc5Ojpi2rRpyM/Px969e+sUO4BKsdeku7bhf/zzzz9X27B127ZtcHR0NCm5k8vlxhIIg7p8jp9//nmTKqA+ffpAp9MhOTkZQHnVZk5ODsaMGWM8XmZmJhQKBbp3737P6tz6cnNzAwCT78idpYIajQaZmZmIjo7G5cuXTaqgqqNSqSCXl/9s6XQ63L5921hle+e1w9PTE9evX8eRI0eqPI4gCPj+++8xdOhQCIJg8t7ExMRAo9FUeS0i68REhkSxcuVKtG/fHk5OTvDx8YGfnx82b95c5cUsMjKy0rLmzZsb2ykkJSVBEATMnj0bfn5+Jo+5c+cCKK8CMLf8/HyTpOFuo0aNQq9evTBx4kQEBARg9OjR+Pbbb2uV1DRq1KhWDXvvfq9kMhmaNWtm8XFPkpOTERwcXOn9aNWqlXH9nUJDQysdw8vLC9nZ2fc9T2RkpPHH7H7nqWnscrncJLECgBYtWtx33+joaDz++OOYN28efH19MXz4cCxfvhxardbk+EFBQXBxcTHZt1mzZiav6/I5vvt99PLyAgDj+3jx4kUA5e267j7m9u3bLfK9MMjPzwcAk8/E/v37MWDAALi6usLT0xN+fn7Gdl81SWT0ej0WL16MyMhIqFQq+Pr6ws/PD6dPnzbZ/7XXXoObmxu6deuGyMhITJkyxVilCwC3bt1CTk4Ovvjii0rvi+HGwZLvDZkX28hQg1uzZg3i4uIwYsQIvPrqq/D394dCoUBCQgIuXbpU6+MZEoNZs2YhJiamym3u/tGor+vXr0Oj0dzzuM7Ozti3bx92796NzZs3Y9u2bfjmm2/Qv39/bN++vUa9X2rTrqWmqmvEqdPpGqxHTnXnEe5qGGztDIMQ/v777/jpp5/wyy+/YMKECVi4cCF+//13Y6lETdTlc3y/99FwzNWrVyMwMLDSdpbsZWdokG+I+dKlS3jooYfQsmVLLFq0CCEhIVAqldiyZQsWL15cowR/wYIFmD17NiZMmIC3334b3t7ekMvlmD59usn+rVq1woULF/Dzzz9j27Zt+P777/H5559jzpw5mDdvnnHbsWPHVtlWCSgvJSRpYCJDDW79+vVo0qQJNmzYYPKjarjrvJvhrvJOf/31l7HXTJMmTQCUVzEMGDDA/AFXwdAgsrofHAO5XI6HHnoIDz30EBYtWoQFCxbgzTffxO7duzFgwACzjwR893slCAKSkpJMLspeXl7IycmptG9ycrLxvQSqT3iqEhYWhl9//RV5eXkmd+Dnz583rjeHsLAwnD59Gnq93qRUpj7nCQsLg16vx6VLl0xKYWozTs4DDzyABx54AO+++y7Wrl2LZ555BuvWrcPEiRMRFhaG3bt3G7vOGyQlJZkcwxKfY0Mpk7+//32Pac7Pok6nw9q1a+Hi4oLevXsDAH766SdotVps2rTJpCSpquqt6mJZv349+vXrh//85z8my3NycuDr62uyzNXVFaNGjcKoUaNQUlKCxx57DO+++y7i4+ONvdN0Ol2Dvi9kGaxaogZnuIu88+770KFDOHjwYJXbb9y40aRtwOHDh3Ho0CEMHDgQQPlFum/fvli2bBlu3rxZaf+quqPWx65du/D2228jIiICzzzzTLXbZWVlVVpm6JFlqHpwdXUFgCoTi7ow9PAyWL9+PW7evGl8r4DyH7fff/8dJSUlxmU///xzpW7atYlt0KBB0Ol0+PTTT02WL168GDKZzOT89TFo0CCkpaWZ9MopKyvDJ598Ajc3N0RHR9f6mIbYPv74Y5PliYmJ9903Ozu7UinS3f/jmJgYlJaW4ssvvzRuo9frjd2dDSzxOY6JiYGHhwcWLFhQZRueO49prs+iTqfDtGnTcO7cOUybNg0eHh4Aqv7eazQaLF++vNIxXF1dq4xDoVBUer+/++67Sm2Hbt++bfJaqVSidevWEAQBpaWlUCgUePzxx/H9999XOZSDJd4XshyWyJBFfPXVV1WOC/Lyyy9jyJAh2LBhA0aOHInBgwfjypUrWLp0KVq3bm2sV79Ts2bN0Lt3b7z44ovQarVITEyEj48P/vnPfxq3+eyzz9C7d2+0a9cOkyZNQpMmTZCeno6DBw/i+vXrVY4zURNbt27F+fPnUVZWhvT0dOzatQs7duxAWFgYNm3aBCcnp2r3nT9/Pvbt24fBgwcjLCwMGRkZ+Pzzz9G4cWPjXWrTpk3h6emJpUuXwt3dHa6urujevTsiIiLqFK+3tzd69+6N8ePHIz09HYmJiWjWrJlJQ9OJEydi/fr1ePTRR/HUU0/h0qVLWLNmTaU2IrWJbejQoejXrx/efPNNXL16FVFRUdi+fTt+/PFHTJ8+vdKx6+r555/HsmXLEBcXh2PHjiE8PBzr16/H/v37kZiYeM82S9Xp0KEDxowZg88//xwajQY9e/bEzp07K5WYVGXlypX4/PPPMXLkSDRt2hR5eXn48ssv4eHhgUGDBgEob6zerVs3vPLKK0hKSkLLli2xadMmY6J75x2/uT/HHh4eWLJkCcaNG4dOnTph9OjR8PPzw7Vr17B582b06tXLmHwaRj6eNm0aYmJioFAoMHr06HseX6PRGMdSKiwsRFJSEjZs2IBLly5h9OjRePvtt43bPvLII1AqlRg6dCheeOEF5Ofn48svv4S/v3+lxK1z585YsmQJ3nnnHTRr1gz+/v7o378/hgwZgvnz52P8+PHo2bMnzpw5g//+978mJYmGcwUGBqJXr14ICAjAuXPn8Omnn2Lw4MHGz8h7772H3bt3o3v37pg0aRJat26NrKwsHD9+HL/++qvx/2Pu7yhZgCh9pchmGbqFVvdISUkR9Hq9sGDBAiEsLExQqVRCx44dhZ9//rlSl15D9+sPPvhAWLhwoRASEiKoVCqhT58+wqlTpyqd+9KlS8Kzzz4rBAYGCo6OjkKjRo2EIUOGCOvXrzduU9vu14aHUqkUAgMDhYcfflj46KOPTLr5Gtzd/Xrnzp3C8OHDheDgYEGpVArBwcHCmDFjhL/++stkvx9//FFo3bq14ODgYNLN817dW6vrfv31118L8fHxgr+/v+Ds7CwMHjxYSE5OrrT/woULhUaNGgkqlUro1auXcPTo0UrHvFdsd/+vBKG8q++MGTOE4OBgwdHRUYiMjBQ++OADk+7BglDe/XrKlCmVYqquW/jd0tPThfHjxwu+vr6CUqkU2rVrV2XX2Jp2vxYEQSgqKhKmTZsm+Pj4CK6ursLQoUOFlJSU+3a/Pn78uDBmzBghNDRUUKlUgr+/vzBkyBDh6NGjJse/deuW8PTTTwvu7u6CWq0W4uLihP379wsAhHXr1plsW5PPcXXDA1T3+d69e7cQExMjqNVqwcnJSWjatKkQFxdnEmdZWZnw0ksvCX5+foJMJrtvV2zD8ACGh5ubmxAZGSmMHTtW2L59e5X7bNq0SWjfvr3g5OQkhIeHC++//77w1VdfmbynglDePXzw4MGCu7u7AMD4uSwuLhZeeeUVISgoSHB2dhZ69eolHDx4sNJnd9myZcKDDz4o+Pj4CCqVSmjatKnw6quvChqNxiSe9PR0YcqUKUJISIjg6OgoBAYGCg899JDwxRdfmGxX3feArINMECTWuo6IyAZs3LgRI0eOxG+//YZevXqJHQ6RZDGRISKysKKiIpMeaDqdDo888giOHj2KtLQ0i/ROI7IXbCNDRGRhL730EoqKitCjRw9otVps2LABBw4cwIIFC5jEENUTS2SIiCxs7dq1WLhwIZKSklBcXIxmzZrhxRdfxNSpU8UOjUjymMgQERGRZHEcGSIiIpIsq09k9u3bh6FDhyI4OBgymQwbN24UOyQiIiKyElbf2LegoABRUVGYMGECHnvssVrvr9frkZqaCnd3dw41TUREJBGCICAvLw/BwcGVJoq9k9UnMgMHDqzX8OapqakICQkxY0RERETUUFJSUtC4ceNq11t9IlNbWq3WOMcJ8Pe8HikpKcY5P8wlIyMDubm5Zj2mNdJqtUhPTxc7DDKzgIAAqFQqscOwKA8PD/j7+4sdBhHVQW5uLkJCQu479YjNJTIJCQmYN29epeUeHh5mTWTS09Px4uQpKC3R3n9jIhKFo1KFNatXISAgQOxQiKiO7tcsxOYSmfj4eMycOdP42pDRmZtGo0FpiRZFTaKhd1Kb/fhEVD/yYg1weS80Gk2DJTKZmkI0ntcVAHB97hH4ql0a5LxE9szmEhmVStWgxeV6JzX0rr4Ndj4isl56vQCt+k/jcyKyPKvvfk1ERERUHasvkcnPz0dSUpLx9ZUrV3Dy5El4e3sjNDRUxMiIiIhIbFafyBw9ehT9+vUzvja0f4mNjcWKFStEioqIiIisgdUnMn379gWngyIiIqKqsI0MERERSZbVl8gQEUmFXC6DIi/M+JyILI+JDBGRmfiqXVD24VWxwyCyK6xaIiIiIsliIkNERESSxUSGiMhMsnKL4DqjK1xndEVWbpHY4RDZBbaRISIykzKdHoWeR43PicjyWCJDREREksVEhoiIiCSLiQwRERFJFhMZIiIikiwmMkRERCRZ7LVUT/KiHLFDIKIqiPXdlBX5inJeInvFRKaenK/sEzsEIrIS/l6u0L93S+wwiOwKE5l6Kop4EHpnT7HDIKK7yItyeKNBZAeYyNST3tkTelcWJRMREYmBjX2JiMwkK7cIntP7wnN6X05RQNRAWCJDRGQmZTo9NF57jc+JyPJYIkNERESSxUSGiIiIJIuJDBEREUkWExkiIiKSLCYyREREJFnstUREZE4lLmJHQGRXmMgQEZmJv5crhHcLxA6DyK6waomIiIgki4kMERERSRYTGSIiM8nJL4b/jMHwnzEYOfnFYodDZBfYRoaIyExKSnW45bnF+JyILE8SJTKfffYZwsPD4eTkhO7du+Pw4cNih0RERERWwOoTmW+++QYzZ87E3Llzcfz4cURFRSEmJgYZGRlih0ZEREQis/pEZtGiRZg0aRLGjx+P1q1bY+nSpXBxccFXX30ldmhEREQkMqtuI1NSUoJjx44hPj7euEwul2PAgAE4ePBglftotVpotVrj69zcXIvGKC/WWPT4RFQ3/G4S2QerTmQyMzOh0+kQEBBgsjwgIADnz5+vcp+EhATMmzfP4rGp1Wo4KlXA5b0WPxcR1Y2jUgW1Wi12GERkQVadyNRFfHw8Zs6caXyt0WgQGhpq9pIZZ2dnLPn8M4uX+FgDrVaL9PR0scMgMwsICIBKpRI7DIvy8PCAs7Nzg31P83ILgGLD81w4KdhziaiuDN9bQRDuuZ1VJzK+vr5QKBSVfkTT09MRGBhY5T4qlcrk4mx4I0JCQiwXKBHRXZq9Fyx2CEQ2IS8v754lq1adyCiVSnTu3Bk7d+7EiBEjAAB6vR47d+7E1KlTa3SM4OBgpKSkwN3dHTKZzILRktTl5uYiJCQEKSkp8PDwEDscIiK7JggC8vLyEBx875sCq05kAGDmzJmIjY1Fly5d0K1bNyQmJqKgoADjx4+v0f5yuRyNGze2cJRkSzw8PJjIEBFZgZq0cbP6RGbUqFG4desW5syZg7S0NHTo0AHbtm2r1ACYiIiI7I9MuF8rGiI7kZubC7VaDY1GwxIZIiKJsPoB8Ygaikqlwty5c22+Jw8RkS1hiQwRERFJFktkiIiISLKYyBAREZFkMZEhIiIiyWIiQ0RERJLFRIaIiIgki4kMERERSRYTGSIiIpIsJjJEREQkWUxkiIiISLKYyBAREZFkMZEhIiIiyWIiQ0RERJLFRIaIiIgki4kMERERSRYTGSIiIpIsJjJEREQkWUxkiIiISLKYyBAREZFkMZEhIiIiyWIiQ0RERJLFRIaIiIgki4kMERERSRYTGSIiIpIsJjJEREQkWUxkiIiISLIcxA7A0vR6PVJTU+Hu7g6ZTCZ2OERERFQDgiAgLy8PwcHBkMurL3ex+UQmNTUVISEhYodBREREdZCSkoLGjRtXu97mExl3d3cA5W+Eh4eHyNEQkS27lV2AZkuDAQBJ/0iFn5eryBERSVdubi5CQkKMv+PVsflExlCd5OHhwUSGiCyqWKcAnMqfu3t4wMODiQxRfd2vWQgb+xIREZFkMZEhIiIiybL5qiUiooYkK/IVOwQiu8JEpoJOp0NpaanYYdg1pVJ5zy52RNbO38sV+vduiR0GkV2x+0RGEASkpaUhJydH7FDsnlwuR0REBJRKpdihEBGRRNh9ImNIYvz9/eHi4sJB80RiGLjw5s2bCA0N5f+BiIhqxK4TGZ1OZ0xifHx8xA7H7vn5+SE1NRVlZWVwdHQUOxyiWsvKLUKTOQMBAJfnb4W3h7PIERHZPlEbJOzbtw9Dhw5FcHAwZDIZNm7caFxXWlqK1157De3atYOrqyuCg4Px7LPPIjU11WznN7SJcXFxMdsxqe4MVUo6nU7kSIjqpkynh8ZrLzRee1Gm04sdDpFdEDWRKSgoQFRUFD777LNK6woLC3H8+HHMnj0bx48fx4YNG3DhwgUMGzbM7HGwGsM68P9ARES1JWrV0sCBAzFw4MAq16nVauzYscNk2aeffopu3brh2rVrCA0NbYgQiYiIyIpJqq+rRqOBTCaDp6dntdtotVrk5uaaPKj29uzZA5lMZuzNtWLFinu+70RERGKQTGPf4uJivPbaaxgzZsw950xKSEjAvHnzGjAyccTFxWHlypV44YUXsHTpUpN1U6ZMweeff47Y2FisWLHCLOcbNWoUBg0aZJZjWbO1h67Vavunu7NkkIjuj9cWy5FEiUxpaSmeeuopCIKAJUuW3HPb+Ph4aDQa4yMlJaWBomx4ISEhWLduHYqKiozLiouLsXbtWrNXvTk7O8Pf39+sxyQiIqovq09kDElMcnIyduzYcd8ZrFUqlXGma1uf8bpTp04ICQnBhg0bjMs2bNiA0NBQdOzY0bhMr9cjISEBERERcHZ2RlRUFNavX29yrC1btqB58+ZwdnZGv379cPXqVZP1d1ctXbp0CcOHD0dAQADc3NzQtWtX/Prrryb7hIeHY8GCBZgwYQLc3d0RGhqKL774wnxvAJE1KnEpfxBRg7DqRMaQxFy8eBG//vqrxcd6EQQBhSVlojwEQahTzBMmTMDy5cuNr7/66iuMHz/eZJuEhASsWrUKS5cuxdmzZzFjxgyMHTsWe/fuBQCkpKTgsccew9ChQ3Hy5ElMnDgRr7/++j3Pm5+fj0GDBmHnzp04ceIEHn30UQwdOhTXrpkWny5cuBBdunTBiRMnMHnyZLz44ou4cOFCnf5WImvn7+UK4d0CCO8WwN/LVexwiOyCqG1k8vPzkZSUZHx95coVnDx5Et7e3ggKCsITTzyB48eP4+eff4ZOp0NaWhoAwNvb2yLD2BeV6tB6zi9mP25N/Dk/Bi7K2v87xo4di/j4eCQnJwMA9u/fj3Xr1mHPnj0Ayhs/L1iwAL/++it69OgBAGjSpAl+++03LFu2DNHR0ViyZAmaNm2KhQsXAgBatGiBM2fO4P3336/2vFFRUYiKijK+fvvtt/HDDz9g06ZNmDp1qnH5oEGDMHnyZADAa6+9hsWLF2P37t1o0aJFrf9WIiKiu4mayBw9ehT9+vUzvp45cyYAIDY2Fm+99RY2bdoEAOjQoYPJfrt370bfvn0bKkyr5ufnh8GDB2PFihUQBAGDBw+Gr+/fs+8mJSWhsLAQDz/8sMl+JSUlxuqnc+fOoXv37ibrDUlPdfLz8/HWW29h8+bNuHnzJsrKylBUVFSpRKZ9+/bG5zKZDIGBgcjIyKjT30pERHQ3UROZvn373rNKpa7VLXXl7KjAn/NjGvScd567riZMmGAsBbl7cMH8/HwAwObNm9GoUSOTdSqVqs7nnDVrFnbs2IEPP/wQzZo1g7OzM5544gmUlJSYbHf3VAMymQx6PUc8JduUk1+M5rMfBwD89fb38HRzEjkiItsnme7XDUEmk9Wpekdsjz76KEpKSiCTyRATY5qItW7dGiqVCteuXUN0dHSV+7dq1cpY+mXw+++/3/Oc+/fvR1xcHEaOHAmgPGG6u4Ewkb0pKdXhlucW43Misjzp/WpTJQqFAufOnTM+v5O7uztmzZqFGTNmQK/Xo3fv3tBoNNi/fz88PDwQGxuLf/zjH1i4cCFeffVVTJw4EceOHbvv+DORkZHYsGEDhg4dCplMhtmzZ7OkhYiIGpxV91qimrtXV/O3334bs2fPRkJCAlq1aoVHH30UmzdvRkREBAAgNDQU33//PTZu3IioqCgsXboUCxYsuOf5Fi1aBC8vL/Ts2RNDhw5FTEwMOnXqZPa/i4iI6F5kQkM3RGlgubm5UKvV0Gg0lX7oi4uLceXKFURERMDJiXXZYrOG/wdH36T6yMguQMDHbgCA9Gn57IJNRry21N69fr/vxBIZIiIikiwmMkRERCRZTGSIiIhIsthriYjITPy9XCHMtelmh0RWhyUyREREJFlMZIiIiEiymMgQEZlJTn4xGs98Eo1nPomc/GKxwyGyC2wjQ0RkJiWlOtxQr694vkLcYIjsBEtkiIiISLKYyBAREZFkMZGRqLi4OMhkMvzjH/+otG7KlCmQyWSIi4tr+MCIiIgaEBMZCQsJCcG6detQVFRkXFZcXIy1a9ciNJTzdBARke1jIiNhnTp1QkhICDZs2GBctmHDBoSGhqJjx47GZXq9HgkJCYiIiICzszOioqKwfv1643qdTofnnnvOuL5Fixb46KOPTM4VFxeHESNG4MMPP0RQUBB8fHwwZcoUlJaWWv4PJSIiqgZ7LVWhoKD6dQoFcOfEzPfaVi4HnJ3vv61rPSbInTBhApYvX45nnnkGAPDVV19h/Pjx2LNnj3GbhIQErFmzBkuXLkVkZCT27duHsWPHws/PD9HR0dDr9WjcuDG+++47+Pj44MCBA3j++ecRFBSEp556ynic3bt3IygoCLt370ZSUhJGjRqFDh06YNKkSXX/A4iIiOqBiUwV3NyqXzdoELB589+v/f2BwsKqt42OBu7IJxAeDmRmVt5OqMeI5mPHjkV8fDySk5MBAPv378e6deuMiYxWq8WCBQvw66+/okePHgCAJk2a4LfffsOyZcsQHR0NR0dHzJs3z3jMiIgIHDx4EN9++61JIuPl5YVPP/0UCoUCLVu2xODBg7Fz504mMkQVfNUuSJ+Wb3xORJbHREbi/Pz8MHjwYKxYsQKCIGDw4MHw9fU1rk9KSkJhYSEefvhhk/1KSkpMqp8+++wzfPXVV7h27RqKiopQUlKCDh06mOzTpk0bKBQK4+ugoCCcOXPGMn8YkQTJ5TL4e9WjiJWIao2JTBXy86tfd8fvOAAgI6P6beV3tUC6erXOId3ThAkTMHXqVADlCcmd8iv+mM2bN6NRo0Ym61QqFQBg3bp1mDVrFhYuXIgePXrA3d0dH3zwAQ4dOmSyvaOjo8lrmUwGvV5v1r+FiIioNpjIVKE2bVYstW1tPProoygpKYFMJkNMTIzJutatW0OlUuHatWuIjo6ucv/9+/ejZ8+emDx5snHZpUuXLBMskQ3LLdCi09wXAADH5y2Dh6tK5IiIbB8TGRugUChw7tw54/M7ubu7Y9asWZgxYwb0ej169+4NjUaD/fv3w8PDA7GxsYiMjMSqVavwyy+/ICIiAqtXr8aRI0cQEREhxp9DJFnFJWW45L6y4vlnTGSIGgATGRvh4eFR7bq3334bfn5+SEhIwOXLl+Hp6YlOnTrhjTfeAAC88MILOHHiBEaNGgWZTIYxY8Zg8uTJ2Lp1a0OFT0REVCcyQahPnxnrl5ubC7VaDY1GU+nHvri4GFeuXEFERASc7uxTTaKwhv/H2kPXarX909058CD9LSO7AAEfl3d7TJ+Wz4a/ZMRrS+3d6/f7ThwQj4iIiCSLiQwRERFJFhMZIiIikiwmMkRERCRZ7LVERGQmvmoX/Dk+w/iciCyPiQzA0WmthI13oCM7IJfL0CrUT+wwiOyKXScySqUScrkcqamp8PPzg1KphEwmEzssuyQIAm7dugWZTFZpKgQiIqLq2HUiI5fLERERgZs3byI1NVXscOyeTCZD48aNK41OTCQVuQVa9Jo/EwCwf84ijuxL1ADsOpEByktlQkNDUVZWBp1OJ3Y4ds3R0ZFJDElacUkZ/nD5vOL5v5jIEDUAu09kABirM1ilQUREJC3sfk1ERESSJWois2/fPgwdOhTBwcGQyWTYuHGjyXpBEDBnzhwEBQXB2dkZAwYMwMWLF8UJloiIiKyOqIlMQUEBoqKi8Nlnn1W5/l//+hc+/vhjLF26FIcOHYKrqytiYmJQXFzcwJESERGRNRK1jczAgQMxcODAKtcJgoDExET83//9H4YPHw4AWLVqFQICArBx40aMHj26IUMlIiIiK2S1bWSuXLmCtLQ0DBgwwLhMrVaje/fuOHjwYLX7abVa5ObmmjyIiIjINlltr6W0tDQAQEBAgMnygIAA47qqJCQkYN68eRaNjYioKt4ezvjf41eMz4nI8qy2RKau4uPjodFojI+UlBSxQyIiO+GgkKN323D0bhsOB4XNXV6JrJLVftMCAwMBAOnp6SbL09PTjeuqolKp4OHhYfIgIiIi22S1iUxERAQCAwOxc+dO47Lc3FwcOnQIPXr0EDEyIqKq5ReVoOubr6Lrm68iv6hE7HCI7IKobWTy8/ORlJRkfH3lyhWcPHkS3t7eCA0NxfTp0/HOO+8gMjISERERmD17NoKDgzFixAjxgiYiqkZhcSmOKj+seP4W3JyVIkdEZPtETWSOHj2Kfv36GV/PnFk+2VpsbCxWrFiBf/7znygoKMDzzz+PnJwc9O7dG9u2bYOTk5NYIRMREZEVETWR6du3LwRBqHa9TCbD/PnzMX/+/AaMioiIiKTCatvIEBEREd0PExkiIiKSLCYyREREJFlMZIiIiEiyrHaKAiIiqfH2cMbGh/8wPiciy2MiQ0RkJg4KOYb3bCN2GER2hVVLREREJFkskSEiMpP8ohIMeX8BAODn197gyL5EDYCJDBGRmRQWl2KvbF7F81eZyBA1AFYtERERkWQxkSEiIiLJYiJDREREksVEhoiIiCSLiQwRERFJFhMZIiIikix2vyYiMhNPNyes6HnY+JyILI+JDBGRmSgdFYh9uKvYYRDZFVYtERERkWSxRIaIyEzyi0owavFHAIBvZrzMkX2JGgATGSIiMyksLsWW0n9WPJ/MRIaoAbBqiYiIiCSLiQwRERFJFhMZIiIikiwmMkRERCRZTGSIiIhIspjIEBERkWSx+zURkZl4ujlhcdRu43MisjwmMkREZqJ0VGD6iL5ih0FkV1i1RERERJLFEhkiIjMpLC5F3KdfAABWTH0eLk6OIkdEZPtYIkNEZCb5RSX4rmAqviuYivyiErHDIbILTGSIiIhIspjIEBERkWQxkSEiIiLJYiJDREREkmXViYxOp8Ps2bMREREBZ2dnNG3aFG+//TYEQRA7NCIiIrICVt39+v3338eSJUuwcuVKtGnTBkePHsX48eOhVqsxbdo0scMjIiIikVl1InPgwAEMHz4cgwcPBgCEh4fj66+/xuHDh0WOjIioMg9XFeY2+9n4nIgsz6oTmZ49e+KLL77AX3/9hebNm+PUqVP47bffsGjRomr30Wq10Gq1xte5ubkNESoREZyUDnjrmcFih0FkV6w6kXn99deRm5uLli1bQqFQQKfT4d1338UzzzxT7T4JCQmYN29eA0ZJREREYrHqxr7ffvst/vvf/2Lt2rU4fvw4Vq5ciQ8//BArV66sdp/4+HhoNBrjIyUlpQEjJiJ7VlhciomfrsDET1egsLhU7HCI7IJVl8i8+uqreP311zF69GgAQLt27ZCcnIyEhATExsZWuY9KpYJKxbppImp4+UUl+M/t8QCABUVPcq4logZg1SUyhYWFkMtNQ1QoFNDr9SJFRERERNbEqktkhg4dinfffRehoaFo06YNTpw4gUWLFmHChAlih0ZERERWwKoTmU8++QSzZ8/G5MmTkZGRgeDgYLzwwguYM2eO2KERERGRFbDqRMbd3R2JiYlITEwUOxQiIiKyQlbdRoaIiIjoXpjIEBERkWRZddUSEZGUeLiqMKPxt8bnRGR5TGSIiMzESemARc89KXYYRHaFVUtEREQkWSyRISIyk+KSMryx+gcAwIJxI+Gk5CWWyNL4LSMiMpPcAi0WX38KAPB6QT4TGaIGwKolIiIikiwmMkRERCRZdUpkLl++bO44iIiIiGqtTolMs2bN0K9fP6xZswbFxcXmjomIiIioRuqUyBw/fhzt27fHzJkzERgYiBdeeAGHDx82d2xERERE91SnRKZDhw746KOPkJqaiq+++go3b95E79690bZtWyxatAi3bt0yd5xEREREldSrsa+DgwMee+wxfPfdd3j//feRlJSEWbNmISQkBM8++yxu3rxprjiJiKyem7MSz/ksx3M+y+HmrBQ7HCK7UK9E5ujRo5g8eTKCgoKwaNEizJo1C5cuXcKOHTuQmpqK4cOHmytOIiKr5+LkiH9PjcO/p8bBxclR7HCI7EKdRmtatGgRli9fjgsXLmDQoEFYtWoVBg0aBLm8PC+KiIjAihUrEB4ebs5YiYiIiEzUKZFZsmQJJkyYgLi4OAQFBVW5jb+/P/7zn//UKzgiIikpLinDe9/9AgB4/ckYjuxL1ADq9C3bsWMHQkNDjSUwBoIgICUlBaGhoVAqlYiNjTVLkEREUpBboMW8pCEAgMmcooCoQdSpjUzTpk2RmZlZaXlWVhYiIiLqHRQRERFRTdQpkREEocrl+fn5cHJyqldARERERDVVq3LPmTNnAgBkMhnmzJkDFxcX4zqdTodDhw6hQ4cOZg2QiIiIqDq1SmROnDgBoLxE5syZM1Aq/x4nQalUIioqCrNmzTJvhERERETVqFUis3v3bgDA+PHj8dFHH8HDw8MiQRERERHVRJ2a1C9fvtzccRARERHVWo0TmcceewwrVqyAh4cHHnvssXtuu2HDhnoHRkQkNW7OSjzp+qnxORFZXo0TGbVaDZlMZnxORESmXJwc8e2sKWKHQWRXapzI3FmdxKolIiIisgZ1GkemqKgIhYWFxtfJyclITEzE9u3bzRYYEZHUlJTqkLhxDxI37kFJqU7scIjsQp0SmeHDh2PVqlUAgJycHHTr1g0LFy7E8OHDsWTJErMGSEQkFTn5xZhxqh9mnOqHnPxiscMhsgt1SmSOHz+OPn36AADWr1+PwMBAJCcnY9WqVfj444/NGiARERFRdeqUyBQWFsLd3R0AsH37djz22GOQy+V44IEHkJycbNYAiYiIiKpTp0SmWbNm2LhxI1JSUvDLL7/gkUceAQBkZGRwkDwiIiJqMHVKZObMmYNZs2YhPDwc3bt3R48ePQCUl8507NjRrAESERERVadOI/s+8cQT6N27N27evImoqCjj8oceeggjR440W3BERERE91KnRAYAAgMDERgYaLKsW7du9Q6IiIiIqKbqlMgUFBTgvffew86dO5GRkQG9Xm+y/vLly2YJDgBu3LiB1157DVu3bkVhYSGaNWuG5cuXo0uXLmY7BxGRObg4OWKQ47+Mz4nI8uqUyEycOBF79+7FuHHjEBQUZJy6wNyys7PRq1cv9OvXD1u3boWfnx8uXrwILy8vi5yPiKg+3JyV2PzGq2KHQWRX6pTIbN26FZs3b0avXr3MHY+J999/HyEhISZTIkRERFj0nERERCQddeq15OXlBW9vb3PHUsmmTZvQpUsXPPnkk/D390fHjh3x5Zdf3nMfrVaL3NxckwcRUUMoKdVh5Y4jWLnjCKcoIGogdUpk3n77bcyZM8dkviVLuHz5MpYsWYLIyEj88ssvePHFFzFt2jSsXLmy2n0SEhKgVquNj5CQEIvGSERkkJNfjLgD3RB3oBunKCBqIHWqWlq4cCEuXbqEgIAAhIeHw9HRtFHb8ePHzRKcXq9Hly5dsGDBAgBAx44d8ccff2Dp0qWIjY2tcp/4+HjMnDnT+Do3N5fJDBERkY2qUyIzYsQIM4dRtaCgILRu3dpkWatWrfD9999Xu49KpYJKpbJ0aERERGQF6pTIzJ0719xxVKlXr164cOGCybK//voLYWFhDXJ+IiIism51aiMDADk5Ofj3v/+N+Ph4ZGVlASivUrpx44bZgpsxYwZ+//13LFiwAElJSVi7di2++OILTJkyxWznICIiIumqU4nM6dOnMWDAAKjValy9ehWTJk2Ct7c3NmzYgGvXrmHVqlVmCa5r16744YcfEB8fj/nz5yMiIgKJiYl45plnzHJ8IiIikrY6JTIzZ85EXFwc/vWvf8Hd3d24fNCgQXj66afNFhwADBkyBEOGDDHrMYmIiMg21CmROXLkCJYtW1ZpeaNGjZCWllbvoIiIpMjFyRHRwlzjcyKyvDolMiqVqsqB5v766y/4+fnVOygiIilyc1Ziz1tviR0GkV2pU2PfYcOGYf78+SgtLQUAyGQyXLt2Da+99hoef/xxswZIREREVJ06JTILFy5Efn4+/Pz8UFRUhOjoaDRr1gzu7u549913zR0jEZEklOn0+PHAWfx44CzKdHqxwyGyC3WqWlKr1dixYwf279+PU6dOIT8/H506dcKAAQPMHR8RkWRk5RZhxI62AID0Vvnw93IVOSIi21frREav12PFihXYsGEDrl69CplMhoiICAQGBkIQBMhkMkvESURERFRJraqWBEHAsGHDMHHiRNy4cQPt2rVDmzZtkJycjLi4OIwcOdJScRIRERFVUqsSmRUrVmDfvn3YuXMn+vXrZ7Ju165dGDFiBFatWoVnn33WrEESERERVaVWJTJff/013njjjUpJDAD0798fr7/+Ov773/+aLTgiIiKie6lVInP69Gk8+uij1a4fOHAgTp06Ve+giIiIiGqiVolMVlYWAgICql0fEBCA7OzsegdFREREVBO1aiOj0+ng4FD9LgqFAmVlZfUOiohIilycHNGlZJbxORFZXq0SGUEQEBcXB5VKVeV6rVZrlqCIiKTIzVmJI+9+IHYYRHalVolMbGzsfbdhjyUiIiJqKLVKZJYvX26pOIiIJK9Mp8fv564BAB5oFQoHRZ1mgSGiWqjTFAVERFRZVm4R+nwfAQBIn8YpCogaAm8XiIiISLKYyBAREZFkMZEhIiIiyWIiQ0RERJLFRIaIiIgki4kMERERSRa7XxMRmYmT0gFtCycbnxOR5fGbRkRkJh6uKpx5/zOxwyCyK6xaIiIiIsliiQwRkZno9QIuXM8EALRo7Au5XCZyRES2j4kMEZGZZGoK0Xq5PwBOUUDUUFi1RERERJLFRIaIiIgki4kMERERSRYTGSIiIpIsJjJEREQkWUxkiIiISLLY/ZqIyEyclA5omhdrfE5ElsdvGhGRmXi4qpD04QqxwyCyK6xaIiIiIsmSVCLz3nvvQSaTYfr06WKHQkRUiV4vICO7ABnZBdDrBbHDIbILkklkjhw5gmXLlqF9+/Zih0JEVKVMTSECPnZDwMduyNQUih0OkV2QRBuZ/Px8PPPMM/jyyy/xzjvviB0O2Zjb+Vqs+f0aDl7OxNXMQvRq5oOOoV6QyzjhHxE1HEEQcCIlB/uTMvHjyRt4oIkPxvUIg6+bSuzQrJokEpkpU6Zg8ODBGDBgwH0TGa1WC61Wa3ydm5tr6fBIwrIKSvDUsoO4dKvAuOz74zdwNDkbzz4QDmelQsToiMheFJfqsPr3ZFzJLL8W3dQU49CVLPx0KhXf/aMHfJjMVMvqq5bWrVuH48ePIyEhoUbbJyQkQK1WGx8hISEWjpCkqkBbhvHLD+PSrQIEejjh3ZFtEdMmEEoHOZJvF+LHUzcgCGznQESW99OpVFzJLIBSIccjrQOwYGQ7BKudcDmzALHLDyOvuFTsEK2WVScyKSkpePnll/Hf//4XTk5ONdonPj4eGo3G+EhJSbFwlCRV7209j1PXNfByccSaid3wTPcwRDf3w3O9IiCXAaeva3AyJUfsMInIxp2+noMTKTmQARjfKxx9W/jj6e6hWD2xO3xclfjjRi4WbDkvdphWy6oTmWPHjiEjIwOdOnWCg4MDHBwcsHfvXnz88cdwcHCATqertI9KpYKHh4fJg+huVzML8PXhawCAT8Z0QjN/d+O6EG8X9G/pDwDYdCoV+doyUWIkIttXqC3DxpM3AAD9WvojzMfVuK6pnxs+e6YTAODboym4dCtflBitnVUnMg899BDOnDmDkydPGh9dunTBM888g5MnT0KhYPsFqpsPt19AmV5A3xZ+6B3pW2l9dHN/BHs6QVumx8FLt0WIkIjswcErt1FcqkeghxP6tfCvtP6BJj4Y0MofOr2AD3+5IEKE1s+qG/u6u7ujbdu2JstcXV3h4+NTaTlRTf1xQ4OfT9+ETAb8M6Zlldso5DL0be6PtYev4ffLt/Fgc1+oHJg4070pHRVopHnC+JzoXkruuFHq28IPCnnVPSVfjWmJneczsPWPNJxMyUGHEM8GjNL6WXWJDJElrDhwFQAwtH0wWgdXX/XYOtgDPq5KFJXqcPRqdgNFR1Lm6eaE64u+w/VF38HTrWbt+sh+HbuWjcISHbxcHNEmWF3tdi0C3TGyQyMAwMqK6xf9TXKJzJ49e5CYmCh2GCRRucWl2Hz6JgAgtmfYPbeVy2ToE+kHAPgtKRM6jtRKRGaiFwT8dvEWAKB3ZPWlMQbP9gwHAGw5cxOaQvZgupPkEhmi+th0MhVFpTo083dDp1Cv+27fMdQTLkoFNEWluMyGdkRkJlcyC5BdWAonRzk61+BaFNVYjZaB7tCW6Y2Ng6kcExmyK98cKe+OP7prCGQ1GLnXUSFHu0blRb7sik33k5FdANk8GWTzZMjILrj/DmS3TlVcT9o1UkPpcP+fYplMhtFdy8dF+/rwNY5xdQcmMmQ3zqZqcOaGBo4KGR7r1LjG+xka1p29mYuSMr2FoiMie1Gq0+PMDQ0AIKoWDXdHdGwEpYMc59PyjPsTExmyI4a2MQ+1DIC3q7LG+4V6u8DLxRElZXqcS+OUF0RUPxfS8qAt00Pt7IjwO8aNuR9PFyUeaR0A4O/rGTGRITshCAK2/ZEGABjUPqhW+8pkMuNd0ylWLxFRPRmqqaMae9Z6ctpB7cqvX9vOprF6qQITGbILFzPycbliHpN+LfxqvX+Hxp4AgL/S81BcWnlEaSKimtCW6XAhPQ8AEBVSfZfr6kQ394OqYj64czfzzB2eJDGRIbtgKI3pHekLdyfHWu/v7+EEXzcV9EJ5MkNEVBcX0/Oh0wvwdlUi0KP2Yw25qhzwYPPym7FtZ9PMHZ4kMZEhu2BIZB5tE1jnY7QKLJ+P6XwaExkiqhvD9aNVoHuNek5WxXAd++UPJjIAExmyAylZhfjzZi4UchkGVDSUq4uWQeWjAF9Iy+PgeFQlpaMCfjmD4JcziFMUUCV6QcCFig4DhutJXQxoFQAHuQwX0vNwJZPd/JnIkM3bfSEDANA5zKtWvZXuFurtAmdHBYpKdbiWVWiu8MiGeLo5IWPxZmQs3swpCqiS61mFKCjRwclRXqveSndTuziiexNvAMDu8xnmCk+ymMiQzdtzoXwY8L51aOR7J4VchpaG6qWb7IZNRLVzrqJaqXmA+32nJLif6Ip2Mnv+ulXvuKSOiQzZtOJS3d+zyzb3r/fxDMXBbCdDRLV1oeK60TKw7tVKBn1blF/PDl2+bfc9KZnIkE07ejUbRaU6+Lur0CrIvd7Ha+bnBhmAW/laaIo4cRuZysgugOxNV8jedOUUBWQit6gUabnFkAFo7u9W7+NF+rshSO0EbZkeBy/frn+AEsZEhmzanor2MdHN/ercQ+BOzkoFGnk5AwAuZXASSaqCsrD8QXSHpIrrRSMvZ7ioHOp9PJlMZqwu33vBvquXmMiQTdtbUX8cXc/2MXdq5ld+N5XE2bCJqIYM14tmZiiNMTC0k9lr5+1kmMiQzUrNKcLFjHzIZUCfZuZLZJpWXIguZeRziHAiui+9XsDFihKZSP/6V3Eb9GrmCwe5DFcyC3Dttv2WAjKRIZt1oKKRb/vGnlC71H403+qEervAUSFDnrbMeHEiIqrOubRcFGjLoFTIEeLtbLbjujs5omOoJwBg/6VMsx1XapjIkM06kFT+xe7Z1Mesx3VU/D0GxG8X7ffiQUQ187+K60QTP1c4yM37s9urmS8A4Lck+70WMZEhmyQIgrFExvBFN6emFe1k9tvxxYOIauZ/F8vbsJizfYyB4fp2ICkTejsdcZyJDNmkK5kFSMsthlIhR+cwL7Mf33BB+v3ybZTq9GY/PkmTg0IOdXY01NnRcFDw8krls10fvZoN4O8bIHPqEOIJV6UC2YWl+NNOB+rkN41s0v6K0phOYZ5wssCcN4FqJ7goFSgo0eFUSo7Zj0/S5O3hjJzEPchJ3ANvD/O1hSDpOnEtB9oyPdxUDvB3V5n9+I4KObo3Ka8+t9cSYiYyZJMOVjR869XU/NVKACCXyYx3V/ZcN01E92ao4m7i52qWsayqYqheMtzA2RsmMmRz9HrBOC1Bz2bmbeh7p2ZsJ0NE92G4qWrqa/5qJYPeFYnM4Su3oS2zv+kKmMiQzTmXlovswlK4KhVo39jTYucxjCdz4loO8rVlFjsPSUdGdgHkr/tB/rofpyggFJaU4WRF1XMTv7rPdn0/zQPc4OumQnGpHseTcyx2HmvFRIZsjqE0pluENxwt2ODS21WJUG8XlOkFHL5in0W6VJngnAnBmaV0VD7XW6lOQCNPZ3i7Ki12HplMht7N7LedDBMZsjmGOumeFmofcyfjGA4XmcgQkSnDtahHUx+LtY8xsOfxZJjIkE0p1elx6PLfFw9L61VxF3TAjkfVJKKqGdrHmHtQzqoYEpnT13OgKSq1+PmsCRMZsimnr2tQUKKDp4sjWgd5WPx8D1R0ezyflofb+VqLn4+IpCG3uBRnbmgANMxNVbCnM5r4ukIvlI9vZU+YyJBNMdwB9WjiA7ncskW5AODrpkLLwPJJ4H6/nGXx8xGRNBy+nAW9AET4uiJI3TBjCt05yq89YSJDNuXv9jGWvwMyMNxtsXqJiAzubB/TUOy1nQwTGbIZxaU6HE0uHwq8RwM09DUwNCo+aKeDUdHfHBRyuOR0gUtOF05RYOcONGD7GIMeTXwglwGXbhXgpqaowc4rNn7TyGYcS85GSZkeAR4qNLXgmA136xbhDbkMuJxpXxcPqszbwxkFi4+gYPERTlFgx7IKSnA+LQ/A3+3oGoLaxRHtKsbO2p9kPzdWTGTIZhjGT+jV1NfiXR3vpHZ2RLtGagAslSGivxvbtghwh6+b+edXuhd7HE+GiQzZDMMXt2ezhqtWMjBUZR1gIkNk9wzXooZsH2NwZzsZQRAa/PxiYCJDNkFT9HdXx14WnF+pOoZ68IOXbtvNxYMqy9QUwmFWOBxmhSNTUyh2OCQSQyLTW4Sbqk6hXnBylONWnhYXM/Ib/PxisOpEJiEhAV27doW7uzv8/f0xYsQIXLhwQeywyAr9fvk29ALQpAG7Ot6pS7gXHBUy3MgpwrUs/oDZK71egM49GTr3ZOj1TGjtUUpWIa7eLoRCLkP3Jt4Nfn4nRwW6hpef116ql6w6kdm7dy+mTJmC33//HTt27EBpaSkeeeQRFBRwMjYyZRg3oZcId0AA4KJ0QIcQz/JYWL1EZLcMvZU6hHjC3clRlBgM10F7SWQcxA7gXrZt22byesWKFfD398exY8fw4IMPihQVWaP9FcmDGNVKBj2a+uLI1WwcuHQbY7qFihYHEYnnfxfFvakC/q7S+v1yFkp1eotOnmsNJPXXaTTlbSC8vasvrtNqtcjNzTV5kG1Lzy1GUkY+ZLKG7ep4N7aTIbJver1gLJHtEyleItM6yAOeLo7I15bh9PUc0eJoKJJJZPR6PaZPn45evXqhbdu21W6XkJAAtVptfISEhDRglCQGQ1Fu22A1PF2UosXRMdQTKgc5MvO1SLKTRnZE9LdzabnIKiiBq1JhrGoWg1wuQ6+KnpS/XbT9qm7JJDJTpkzBH3/8gXXr1t1zu/j4eGg0GuMjJSWlgSIksRgGfuopYrUSAKgc/m5kZ29DhBPR321SujfxEb06x57ayUgikZk6dSp+/vln7N69G40bN77ntiqVCh4eHiYPsl2CIPzd0LcBpyWojnEMh4u2f/GgyuRyGVSa1lBpWjfIpKVkXQztY8Todn03QwzHr2WjQFsmcjSWZdWJjCAImDp1Kn744Qfs2rULERERYodEVubq7UKkaoqhVMiNpSFiMtSLH7x8GyVlepGjoYbmq3ZB8aKzKF50Fr5qF7HDoQZUXKrDkatZAIDeIraPMQj1cUGItzPK9AIOV8Rlq6w6kZkyZQrWrFmDtWvXwt3dHWlpaUhLS0NREeezoXKGYtOOoZ5wVipEjqa8kZ2PqxKFJTqcuJYtdjhE1ECOX8tGcakefu4qRPq7iR0OgL9LZfbbeAmxVScyS5YsgUajQd++fREUFGR8fPPNN2KHRlbC0NBXzK6Od5LLZca7sf/Z+MWDiP5252i+DTnX273cOV2BLbPqREYQhCofcXFxYodGVqBMpzcmC2J2dbxbn0g/AMD/Lt4SORJqaJmaQjjNbAOnmW04RYGd+c2K2scY9KgYjuJ8Wh5u5WlFjsZyrDqRIbqX49dykFdcBi8XR7SvmLreGhiSqtM3NMguKBE5GmpIer0ArfpPaNV/cooCO6IpLMVp41xv1pPI+Lip0DqovMOLofTaFjGRIcnacyEDAPBgcz8orKiHSICHE1oEuEMQgP/ZeJEuEQH7L2VCEICmfq4IVDuJHY4JQ1W3LXfDZiJDkrXnQnnVTd8WfiJHUlnfluUx7T6fIXIkRGRpuyq+5/1a+IscSWV/jydjuyOOM5EhScrILcafN3MhkwEPRlpfItO/4oK2+0IGdKxiILJZer1gvGHp38r6Epmu4V5QKuS4kVOE5Nu22W6LiQxJ0p6/yktj2jdSw8dNJXI0lXUO84KHkwNyCkvZDZvIhp26noPbBSVwVzlYxVhWd3NROqBTmCcA2+29xESGJGnXufI7oGgrLMoFAAeFHH0rYtvF6iUim2UojXmwuZ/o0xJUx9CTylAdb2us810nuofiUh32VpTIPNI6QORoqvdQKyYy9kYul0GRFwZFXhinKLATOw3VSi2t86YKAB5qVX6d/N/FWygssb3pCpjIkOT8djETRaU6BKud0CbYeufSim7uB7msfAyH69m2WTdNpnzVLij78CrKPrzKKQrsQJqmGGdTy9vqWWOnA4OWge4I8XaGtkyPfX/ZXvUSExmSnF/OpgEAHmkTaDUjaFbF00WJLhV15r+cTRc5GiIyN8O1qFOol1W21TOQyWSIaR0IANheEbMtYSJDklKm0+PXc+VJwSNtrLdayWBQ2/KLx9YzN0WOhIjMbUvF93pgxffcmj3SpjzGneczUKqzrQltmciQpBxLzkZ2YSk8XRzRzQp7CNzt0bZBAICjydlI0xSLHA1ZWlZuEVxndIXrjK7IyuXktrbsVp7WOKv0wHZBIkdzf53DvODjqoSmqBRHrtjWbNhMZEhStv5RXizav6U/HKy0h8CdAtVO6BzmBeDvYmiyXWU6PQo9j6LQ8yjKbOyul0z9cjYNggBEhXiikaez2OHcl0Iuw4CKRr+bbayE2Pp/CYgqlOn0+Pl0KgBgaFSwyNHUnKHYeYuNXTyI7NnWP8q/z4MkUK1kMCSqvORoy5mbNlW9xESGJGP/pdvIzC+Bt6vSqmaYvR9DsfPhq1lIz2X1EpHUZeZr8fvl8uqZQRKoVjLo0cQHvm4qZBeW4n8XbWdMGSYyJBk/nrwBABjcLshqB56qSiNPZ3QO84IgABtP3BA7HCKqp40nbkCnFxAV4okQb+l0s3dQyDGkfXni9ePJVJGjMR/p/BqQXSsq0eGXivYxIzpKp1rJ4InOjQEA649dt9mJ24jsxffHy29IDN9rKRnRsREAYPvZdBRobWNwPCYyJAk7zqWjoESHxl7O6BTqJXY4tTa4fRBUDnJczMjHmRsascMhojo6m6rBuZu5UCrkGNZeejdVUY3VCPNxQVGpDtv/tI0OCExkSBK+PnQNAPBYx0ZWPQhedTycHPFoRaPA9ceuixwNWZKsyBeyIum04aLaMXx/H24TALWLo8jR1J5MJsPjncpLkr4+lCJyNObBRIasXlJGPg5evg25DBjdLVTscOrMcPH48WQqikt1IkdDluDv5Qr9e7egf+8W/L1cxQ6HzExbpjO2LXmik/SqlQxGdQ2BQi7D4atZuJCWJ3Y49cZEhqze2orSmP4tAxAsgfEaqtOrmS8aeTpDU1TKRr9EEvTTqZvIKihBkNoJfSKlW+oW4OGEhyvGlFl7KFnkaOqPiQxZteJSHdYfKy/+fOYB6ZbGAOUDUsX1DAcALN9/lY1+iSREEAQs338FAPBsj3BJDMh5L2MfCAMAbDh+Q/IzYkv7P0E2b/2x68gtLkNjL2c8GGm9s8vW1FNdQ+CiVOBCeh4OXLotdjhkZlm5RfCc3hee0/tyigIbc/hKFs6m5sLJUY4x3ULEDqfeejb1QbiPC/K0ZfjuqLTb7TGRIatVqtNj6d5LAIDnekdAIZdeI9+7qZ0djV02v/rtisjRkLmV6fTQeO2FxmsvpyiwMV9VlMY81qkxPF2UIkdTf3K5DM/1aQIAWLb3EkrKpPt5ZSJDVmvTyVRczy6Cj6sSo7tKu1rpTnE9wyGTlc9C+we7YhNZvXM3c/HL2XQAwPiK6mFb8GTnxvB3VyFVUyzpdntMZMgq6fUCPt+TBAB4rk8EnJUKkSMynyZ+bhheMVfUh9sviBwNEd3Pwu1/ASgfDyoywF3kaMzHyVGBSRWlMp/vSYJOL812e0xkyCqtP3Ydl24VwMPJAeMqGqXZkukDmkMhl2HPhVs4ejVL7HCIqBonrmXj13PpkMuAGQOaix2O2T3dPRReLo64ersQ3xyR5rgyTGTI6uQVl+Jfv5wHAEzt3wzuTtIbdOp+wn1d8VSX8rYyCVvPQy/ROyEiWyYIAt7bWn4teqxTYzTzdxM5IvNzVTngpf6RAMpLiDWFpSJHVHtMZMjqfLIrCZn5JWji64q4nhFih2Mx0x6KhItSgWPJ2fjmqDTvhIhs2fpj13HoShacHOV4+aFIscOxmHE9whDp74asghIk7vxL7HBqjYkMWZU/bmiMYzXMHtIaSgfb/YgGqZ0x8+HyouqELeeQkVcsckRkFiUu5Q+StMx8Ld7dcg5AeVWwlGa5ri1HhRxzhrYGAKw6mIyTKTniBlRLtvsrQZJTWFKGaV+fQKlOwKNtAtGvpb/YIVlcXM9wtGukRm5xGd7YcIZVTBLn7+UK4d0CCO8WcIoCCRMEAf/3wx/IKSxFqyAPPNfbdkuGDfpE+mFoVDB0egHT151AvoRmxmYiQ1ZBEATM2/QnLmcWINDDCQmPtRM7pAbhoJDjvcfbQamQ49dzGVhSMW4OEYnni32Xse1sGhwVMrz/eDs4SnwU35p6Z3hbBKudcPV2Ieb8+IdkRh+3j/8OWb2ley/jm6MpkMmARaOi4OUq/QGnaqpNsBrzhrcBUN7Ybtf5dJEjIrJf+/66hfe3lTfwnTO0Ddo39hQ3oAakdnFE4uiOkMvKpy74bHeS2CHVCBMZEt23R1KMF443B7VCz6bSnYytrsZ0C8XoriEQBODFNcex769bYodEdZCTXwz/GYPhP2MwcvLZ5klqDiRl4vnVR6EXymerH9vddgbirKluEd6YM6S8vcyH2/8yTtprzZjIkGgEQcBnu5Pwz+9PAwBeeLAJJlYMzmSP5g9viwGtAqAt02PSqqPY9sdNsUOiWiop1eGW5xbc8tyCklKd2OFQLez4Mx0TVh5Bcake/Vr4YcFjbSGTSX9alLqI6xWByX2bAgDe+OEMPt550aqrmZjIkCiyC0owbd1JfPBL+ci2z/WOwGuPthQ5KnEpHeT4/JlOxmTmH2uOY8GWc9CW8QeRyFJKyvR4b+t5TFp11JjELB3XGSoH2xlNvC5ejWmBFx4sv7FctOMvTF17AlkFJSJHVTVJJDKfffYZwsPD4eTkhO7du+Pw4cNih0R1VKrT49sjKXh48V78dCoVchkwf3gbzB7SGnIbmBSyvpQOciwZ2wmT+pT3kvhi32U8mvg//PpnulXfERFJjSAI2H0+A49+tM84Oe34XuFYNq6L3ScxACCTyRA/qBXeGdEWCrkMm8/cxMOL9mLd4WtWN8Gkg9gB3M8333yDmTNnYunSpejevTsSExMRExODCxcuwN/f9rvn2orr2YX48WQqvj58DdeziwAAkf5u+NcT7dEx1Evk6KyLo0KONwe3RucwL/zfxrO4klmAiauOokWAO8b1CMOjbQPh66YSO0wiSbqdr8UvZ9Ox6uBVnE/LAwD4uikxb1hbDG4fJHJ01mfsA2Fo10iNf64/jQvpeXh9wxl8sisJY7qFYHiHRlYxvo5MsPLbvO7du6Nr16749NNPAQB6vR4hISF46aWX8Prrr993/9zcXKjVamg0Gnh4eFg6XLrDiWvZ2HjiBg5cuo2LGfnG5b5uKjz/YARie4Zb3Z1PbRu2PW3hxoB5xaX4dHcSVh1IRlFFmwuZDIhq7InOYV5oE+yBUG8XhHi7wM9NxVItkWVkFyDg4/Jh7NOn5XMsGREJgoBbeVpcyypESnYhzt7IxbFr2TiVkgPDcE1OjnKMeyAMLz0UCQ8LT4VibdeW2iop02PVwatYtu8ybuVpjcub+buhZ1MfDO/QCJ3DzHtTWtPfb6sukSkpKcGxY8cQHx9vXCaXyzFgwAAcPHhQxMioJs6m5mLlwWQA5T++D0T4YGTHRhjWIRhOjtaVwFgrdydHxA9shcl9m+G7oyn48WQqztzQ4GRKTqXRN1UOcvi4KuHu5Ah3Jwe4OznAUSGHo0IOhVwGB4UMDnIZHBRyOMhlkMtkqKotowxVJ0PVtXusanFt2kje61aqulXV7SNUs0ddbtequ8e7V0z5hUXG1wlb/4Sbi3Od4qrt332vvao9Ry3fw3vvY75zVLequj30goDCEh0KtGUo0JYhX1uGwhIdsgtLUFxadRVI20YeGBYVjFFdQqF2sb253CxB6SDHxD5NMPaBMPx0KhUbT5bfpCZl5CMpIx9N/dzMnsjUlFUnMpmZmdDpdAgICDBZHhAQgPPnz1e5j1arhVb7d7ao0WgAlGd21LA6BqowKsoH3SK80SXM2zg2TElRAUqK7rOzSAoL8mq1fUN9rmQAnoryxVNRvriZU4SjyVk4laLBlcwCpGQXIk1TjCItcL2gQcKhapSVlAIVN47f7L8IByV/JMUklwEBHk4I8XJBhJ8Lohp7oku4N4I9yxNMlBUhN7dhLkbWem2pi5jmasQ0VyOnsARHrmbjyJXb6BikMnvMhuPdr+LIqhOZukhISMC8efMqLQ8JCREhGrJ1k8QOgKzWTTwjdggEIBmAFLuHSO3a8r4Fj52Xlwe1Wl3teqtOZHx9faFQKJCebjrSaXp6OgIDA6vcJz4+HjNnzjS+1uv1yMrKgo+Pj92OCUA1k5ubi5CQEKSkpLA9FRGRyARBQF5eHoKDg++5nVUnMkqlEp07d8bOnTsxYsQIAOWJyc6dOzF16tQq91GpVFCpTHt0eHp6WjhSsiUeHh5MZIiIrMC9SmIMrDqRAYCZM2ciNjYWXbp0Qbdu3ZCYmIiCggKMHz9e7NCIiIhIZFafyIwaNQq3bt3CnDlzkJaWhg4dOmDbtm2VGgATERGR/bH6cWSIGopWq0VCQgLi4+MrVU8SEZF1YiJDREREkiWJuZaIiIiIqsJEhoiIiCSLiQwRERFJFhMZIiIikiwmMkRERCRZTGSIiIhIspjIEBERkWQxkSEiIiLJYiJDREREksVEhoiIiCSLiQwRERFJ1v8DTbTvPurBhtUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The number of common diseases: 7095\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from tdc.multi_pred import GDA\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# Load the data\n",
    "data = GDA(name=\"DisGeNET\")\n",
    "data.neg_sample(frac = 1)\n",
    "data.binarize(threshold = 0, order = 'ascending')\n",
    "data.label_distribution()\n",
    "tdc_df = data.get_data()\n",
    "\n",
    "# tdc_df.to_csv('TDC_pn.csv', index=False)\n",
    "# Load CSV file\n",
    "df2 = pd.read_csv('disease_associations.tsv', sep=\"\\t\")\n",
    "\n",
    "# Get the set of disease IDs from both datasets\n",
    "tdc_disease_ids = set(tdc_df['Disease_ID'])\n",
    "df2_disease_ids = set(df2['diseaseId'])\n",
    "\n",
    "# Get the intersection of the two sets, which gives us the common diseases\n",
    "common_diseases = tdc_disease_ids.intersection(df2_disease_ids)\n",
    "\n",
    "print(f\"The number of common diseases: {len(common_diseases)}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ee340c0c-609f-41c2-abe8-334ebb6ad6f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gene_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Disease_ID</th>\n",
       "      <th>Disease</th>\n",
       "      <th>Y</th>\n",
       "      <th>diseaseClass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>C0019209</td>\n",
       "      <td>Hepatomegaly: Abnormal enlargement of the liver.</td>\n",
       "      <td>1</td>\n",
       "      <td>C23;C06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>196</td>\n",
       "      <td>MNSSSANITYASRKRRKPVQKTVKPIPAEGIKSNPSKRHRDRLNTE...</td>\n",
       "      <td>C0019209</td>\n",
       "      <td>Hepatomegaly: Abnormal enlargement of the liver.</td>\n",
       "      <td>1</td>\n",
       "      <td>C23;C06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>213</td>\n",
       "      <td>MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKAL...</td>\n",
       "      <td>C0019209</td>\n",
       "      <td>Hepatomegaly: Abnormal enlargement of the liver.</td>\n",
       "      <td>1</td>\n",
       "      <td>C23;C06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gene_ID                                               Gene Disease_ID  \\\n",
       "0       1  MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...   C0019209   \n",
       "1     196  MNSSSANITYASRKRRKPVQKTVKPIPAEGIKSNPSKRHRDRLNTE...   C0019209   \n",
       "2     213  MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKAL...   C0019209   \n",
       "\n",
       "                                            Disease  Y diseaseClass  \n",
       "0  Hepatomegaly: Abnormal enlargement of the liver.  1      C23;C06  \n",
       "1  Hepatomegaly: Abnormal enlargement of the liver.  1      C23;C06  \n",
       "2  Hepatomegaly: Abnormal enlargement of the liver.  1      C23;C06  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create a new dataframe that only contains the 'diseaseId' and 'diseaseClass' columns from df2\n",
    "df2_reduced = df2[['diseaseId', 'diseaseClass']]\n",
    "df3_reduced = df2[['diseaseId', 'diseaseClass']]\n",
    "\n",
    "# Rename the 'diseaseId' column in df2_reduced to 'Disease_ID' so it can be merged with tdc_df\n",
    "df2_reduced = df2_reduced.rename(columns={'diseaseId': 'Disease_ID'})\n",
    "df3_reduced = df3_reduced.rename(columns={'diseaseId': 'Disease_ID'})\n",
    "\n",
    "# Merge tdc_df with df2_reduced, keeping only the rows where the 'Disease_ID' is present in both dataframes\n",
    "merged_df = pd.merge(tdc_df, df2_reduced, on='Disease_ID')\n",
    "merged_df = pd.merge(tdc_df, df3_reduced, on='Disease_ID')\n",
    "# Now, merged_df contains the 'diseaseClass' for each 'Disease_ID' that is present in both the TDC dataset and your dataset\n",
    "merged_df.head(3)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "16d32215-6393-49c0-8f61-4ec58c74dc94",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select rows where Disease_ID equals 'C0009375'\n",
    "desired_disease_df = merged_df[merged_df['Disease_ID'] == 'C0002395']\n",
    "\n",
    "# Select rows where Disease_ID does not equal 'C0009375'\n",
    "other_diseases_df = merged_df[merged_df['Disease_ID'] != 'C0002395']\n",
    "\n",
    "# Save the dataframes to CSV files\n",
    "# desired_disease_df.to_csv('Alzheimer.csv', index=False)\n",
    "# other_diseases_df.to_csv('Non_Alzheimer.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "fc97dce1-4c96-4d7a-92c4-3789a58e8d5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select rows where Disease_ID equals 'C0009375'\n",
    "desired_disease_df = merged_df[merged_df['Disease_ID'] == 'C0002395']\n",
    "\n",
    "# Get the disease name corresponding to 'C0009375'\n",
    "desired_disease_name = desired_disease_df['Disease'].values[0]\n",
    "\n",
    "# Select rows where Disease_ID does not equal 'C0009375'\n",
    "other_diseases_df = merged_df[merged_df['Disease_ID'] != 'C0002395']\n",
    "\n",
    "# Replace the Disease_ID and Disease in other_diseases_df with the desired values\n",
    "other_diseases_df.loc[:, 'Disease_ID'] = 'C0002395'\n",
    "other_diseases_df.loc[:, 'Disease'] = desired_disease_name\n",
    "\n",
    "# Set Y value to 0 in other_diseases_df\n",
    "other_diseases_df.loc[:, 'Y'] = 0\n",
    "\n",
    "# Concatenate the dataframes\n",
    "final_df = pd.concat([desired_disease_df, other_diseases_df])\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "final_df.to_csv('final_Alzheimer.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "6e2aaa4a-a6fa-4e3b-b778-a6f3d78cd6ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new dataframe containing only rows where 'diseaseClass' contains 'F01'\n",
    "F01_df = merged_df[merged_df['diseaseClass'].str.contains('F01', na=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "ae1b4296-cad6-4598-a27c-f82e1e3c3066",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save the DataFrame to a CSV file\n",
    "F01_df.to_csv('F01_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "c1f190d5-5b3b-474d-803a-7449af6c3fe5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new dataframe containing only rows where 'diseaseClass' contains 'F01'\n",
    "F03_df = merged_df[merged_df['diseaseClass'].str.contains('F03', na=False)]\n",
    "# Save the DataFrame to a CSV file\n",
    "F03_df.to_csv('F03_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "53a3ed55-1ae9-4682-9fce-65986e38f59c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new dataframe containing only rows where 'diseaseClass' contains 'F01'\n",
    "C01_df = merged_df[merged_df['diseaseClass'].str.contains('C01', na=False)]\n",
    "# Save the DataFrame to a CSV file\n",
    "C01_df.to_csv('C01_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "105c73dd-de85-4aa0-a8c0-5f37ba46e69c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The list of diseaseClass codes to include\n",
    "codes = ['C{:02d}'.format(i) for i in range(4, 27)]\n",
    "\n",
    "# Loop through the codes\n",
    "for code in codes:\n",
    "    # Create a new dataframe containing only rows where 'diseaseClass' contains the current code\n",
    "    df = merged_df[merged_df['diseaseClass'].str.contains(code, na=False)]\n",
    "    \n",
    "    # Save the DataFrame to a CSV file\n",
    "    df.to_csv('{}_diseases.csv'.format(code), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "8501a545-7685-4310-8266-045a2287591c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new dataframe containing only rows where 'diseaseClass' contains 'F01'\n",
    "C26_df = merged_df[merged_df['diseaseClass'].str.contains('C26', na=False)]\n",
    "# Save the DataFrame to a CSV file\n",
    "C26_df.to_csv('C26_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "5402a489-e340-4daf-918a-f4771151635c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1481"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Group by 'Disease_ID' and count non-NaN 'diseaseClass' for each group\n",
    "disease_class_counts = merged_df.groupby('Disease_ID')['diseaseClass'].count()\n",
    "\n",
    "# Find 'Disease_ID' where all 'diseaseClass' are NaN\n",
    "diseases_with_all_nan_class = disease_class_counts[disease_class_counts == 0].index.tolist()\n",
    "\n",
    "len(diseases_with_all_nan_class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "8f36129d-13ec-4b33-bd06-cdaf652cc90b",
   "metadata": {},
   "outputs": [],
   "source": [
    "nan_df = merged_df[merged_df['diseaseClass'].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "4dcbdbd6-a6e1-4643-b8a9-6df94eea7b3c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAowAAAICCAYAAABFiZx+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACvPElEQVR4nOzdd1QU1/838PfSVIqAgoqoWHcXwd5jixWs2E1U7AZb7C22aGI3Rr/23qImsWKPGlFsWDGiIKKigiAgVXqRef7wt/uILCu7LAPi+3UO5+jM3Ln3zs7MfnbmFokgCAKIiIiIiHKgV9AFICIiIqLCjQEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYSVRt27aFTCbDrVu3CrooSq9fv4ZMJoNMJsu2btasWZDJZFi3bl0BlEy9wlw2XTt27Bj69OmDunXrKj+r169fi5a/q6srZDIZjh49KlqepFt5+QzV3SMo/xXENU/ZGRR0AejL4Orqitu3b2dZZmRkBDMzM1hZWaFmzZpo0qQJnJ2dUaJEiXwvz7t377Bnzx4AwI8//pjv+Ylt9+7diI+PR8+ePVGhQoWCLk6BOnr0KH766ScAQOXKlVGqVCkAQLFixT6bdtasWTh27FiWZYaGhjA1NUWpUqUgl8vRoEEDdO3aFebm5rovPBV6r1+/xrFjx2BmZoahQ4eKnv/H99bvvvsOCxcuVLnd27dv0aJFCwDAxYsXdXZf+DgIXr16NTp37qxyu9OnT2PKlCmwtbWFh4eHVnm9f/8eZ86cgYeHB3x8fBAdHY2MjAxYWFhAJpOhdevW6Nq1KywtLbXaP+UvBoykERsbG9jY2AD4cPHHx8fj5cuXePLkCY4dO4ZFixZh2rRp+P7771Wmr1ixIoyMjPIcVL579w7r168HkPeA0dDQEFWqVMnTPnRt7969CAkJQePGjXP8YrC2tkaVKlWK/M113759AIBp06Zh1KhRWu2jdOnSsLOzAwAIgoD4+Hi8efMGz58/x+nTp7F8+XL88MMPGD16NAwMst8WbWxsUKVKFZiZmWlfESpQOX2GISEhWL9+PWxtbQskYPzY4cOHMXz4cOW5KrY1a9agY8eOKq+BvPLz88PkyZPx8uVLAICpqSkqVKgAQ0NDRERE4OrVq7h69SrWrFmD5cuXo3379jovA+UNA0bSSO/evbMFaOnp6fjvv/+wc+dOeHh4YMGCBQgMDMScOXOypVc8FSxMypYti3/++aegi6GxqVOnYurUqQVdjHz3/PlzAECbNm203kerVq2wbNmyLMsyMzPh5+eHffv24dixY1i3bh38/f2xdu1a6Ollba2zYsUKrfOmwqGwf4b6+vrIyMjAmjVrsHr16gLJ/9WrVzh06FCOP/i1df/+fQwbNgzJyclwdHTElClT0KRJkyyBaWBgIA4dOoQDBw7g8ePHDBgLIbZhpDwzNDREo0aNsGnTJkyZMgXAhydk58+fL+CSUVGQkpICAChevLhO96unpwdHR0csW7YMv/32GyQSCS5cuFAof9RQ0detWzfo6+vj7Nmz8PX1FT1/FxcXAMCGDRuQnJyss/0mJCRgwoQJSE5ORqtWrfDnn3+iefPm2Z5iVq1aFTNnzsSxY8dQrVo1neVPusMnjKRTbm5uuHnzJm7cuIENGzagY8eOWda3bdsWISEh2Lt3L5o0aZJlnZeXF/bt24cHDx4gJiYGxYsXh6WlJWQyGdq0aYM+ffoAyN4u7dOG6EuXLkWvXr2yrLt48SKio6Oxfft23Lt3D9HR0Rg7dix+/PFHvH79Gu3atQMAPHnyJMe6xcXFYd26dfDw8EBERAQsLS3x7bffYvz48Shbtmy27dXVFQBu3bqFwYMHZ2kT9HF7PQAYPHhwljQ9e/ZUPilTHIfx48erfC0fFhaG7du34+rVq3jz5g0MDAxQpUoVODs7Y9CgQSqbBXxc5ooVK2L9+vW4evUqYmJiUKZMGXTs2BHjx4+HqalpjscpJ4Ig4OzZszh8+DB8fX2RmJiIUqVKoWHDhhg6dChq166dZftPP1fFZ/TpcdCFbt264ebNmzh8+DC2bduGgQMHwsjISLle0c7s43NLwdfXFzt37oS3tzfevn0LQ0NDWFpaokqVKmjevDmGDRsGiUSSJc379+/h7u6OEydOwN/fH4mJibC0tETjxo0xatQoyOXybGWMjo7GxYsX4enpiadPnyIiIgKZmZmwsbFB8+bNMXz4cNja2qqsn1hlzMzMxJEjR3D8+HEEBAQgMTERpqamKF26NOrUqQMXFxc0bdr0s59HcnIyGjVqhIyMDFy/fh2lS5dWrhMEAc2aNUNMTAzkcjmOHz+eJa2Pjw/69u0La2trXLt2Tblc1Wf4cfvBkJCQbOdcTtfu3bt3sXXrVjx48ADJycmws7ND//79MXDgwGzHMbeqVasGFxcXHD16FL///jt27NiR67SJiYnKc+Px48eIiIhAamoqypQpg8aNG2P48OGoUaOG2n307t0b3t7eePnyJfbu3Qs3Nzet6vGpAwcOICIiAmZmZlixYkWW60qVqlWromrVqrnad17q/erVK2zfvh03b95EWFgY9PT0YGlpiYoVK+Kbb77BkCFDYGxsrNw+ISEBO3fuxMWLFxEUFIT09HRYWFgo8/r+++8LrCmBWBgwks65urrixo0b8Pf3R2hoKMqXL//ZNIcOHcLcuXMBACVLlkT16tUhCALCwsLw77//4uHDh8qAsXLlynB0dMSjR48AAPXr18+yr4+/XBTOnz+PVatWwcjICFWqVIGpqalGN/a4uDj07dsXQUFBqFatGqpVq4anT5/i4MGDuHjxIv744w+d/CouXbo06tevj0ePHiEtLQ1SqTRLcFa5cuVc7efu3bsYM2YM3r17B0NDQ9SoUQPJycl49OgRHj16hBMnTmDnzp2wtrZWmf7JkycYP348UlJSUKNGDRgaGiI0NBS7du3C/fv3sX//fo3aOb1//x7Tpk3DmTNnAADlypVDhQoV8OrVK5w+fRpnzpzBnDlz4Orqqkyj+Fy9vb0BAI6Ojsovm9weB00MHjwYhw8fRlRUFB48eIBGjRp9Ns2VK1cwduxYpKenw9jYGFWqVIGBgQHCwsJw7do1XLt2DYMHD85yrOLi4jB27FjcvXsXwIe2qDVq1EBQUBBOnTqFc+fOYfny5ejSpUuWvM6cOYNff/0VhoaGsLKyQtWqVZGYmIjQ0FDs27cPJ06cwK5du+Do6FhgZZw5cyZOnDgBAChTpgwqVqyIhIQEZXvR9PT0XAWMJUqUQJ06dXD37l3cunUrS0eMJ0+eICYmRvnv6OhoZUcoALh58yYA5CofqVSK2NhYBAQEwMjIKNuxU9Vm9ejRo5gzZw5KliyJChUqIDQ0FAEBAfj1118REhKCmTNnfjbfnEyYMAGnT5/GtWvXcPPmzVzVAQBu376N6dOnQ19fH6VLl0alSpWQmpqK0NBQHD16FKdPn8batWvx7bff5rgPAwMDTJo0CZMmTcK2bdvQv39/WFhYaF0XBUVA3717d523t9a23o8fP8bAgQORmJiIYsWKoVKlSihWrBgiIiJw9+5d3L59G507d1YGgAkJCejXrx+eP38OiUSCSpUqoWTJkoiOjkZAQAB8fX1RrVq1Ih8wQiDKhUGDBglSqVRYu3btZ7eNi4sTZDKZIJVKhdOnT2dZ16ZNG0EqlQo3b95ULsvIyBAaN24sSKVSYe/evUJ6enqWNM+ePRP27NmTZVlwcLAglUoFqVSqtiyKbezt7YVly5YJKSkpynXJycmf3dfMmTMFqVQqODg4CB06dBACAgKU60JDQ4U+ffoIUqlU6Natm5CRkfHZun7s5s2bglQqFdq0aZNt3efSfly2Tz+T6OhooVmzZoJUKhVGjRolREVFKdc9fvxYaNu2rSCVSoUhQ4bkmK+Dg4Mwffp04d27d8p1N27cEOrUqSNIpVLh8OHDOZZLlQ0bNghSqVSoXbu28M8//yiXp6amCr/99psglUoFuVwu3L59O1taxWcTHBysUZ6C8P+P0cyZMz+7bWZmptCoUSNBKpUKW7ZsybJOcf4fOXIky/Lu3bsLUqlUWLFihfJ8UggJCRG2bt0qvH//PsvykSNHClKpVOjTp4/g6+urXP7+/Xth9+7dglwuF2rVqiUEBgZmSffgwQPh8uXLWc5hQRCE+Ph44ffffxekUqnQuXNnITMzs0DK6OfnJ0ilUqF+/frZztvMzEzh9u3b2e4H6qxdu1aQSqXC3LlzsyzftWuXIJVKhVatWglSqVQ4c+ZMlvXDhw8XpFKpcOjQoSzLc/oM1V2HCh/fIxwdHYVdu3Zlud43b94sSKVSQSaTCa9evcp1HT8ul+KcW7p0qfLYfywiIiLHa+H58+fC2bNnhfj4+CzLU1JShL179wr29vZCkyZNhKSkpGz5K/Z5//59ITMzU+jVq5cglUqF5cuXZ9nu1KlTnz1On4qJiVHu/9y5c7lOl1MZdVXvMWPGCFKpVJg2bVq2tFFRUcK+ffuEyMhI5TLFOde1a9dsZUhJSRHOnDkjeHt7a12/LwXbMJLOlSxZUvlULDIy8rPbR0dHIzY2FiVLloSrq2u2J1fVqlXL9mpWU82aNcPMmTOzDMWiSZu49PR0LF26NMvrDRsbG6xZswYGBgZ48uQJLl68mKcy6spff/2FqKgoWFhYYPXq1VmevsjlcmXjfy8vL9y7d0/lPipWrIjFixdnecLSrFkz5VPeS5cu5bo8SUlJ2LlzJwBg3LhxcHJyUq4zMjLC1KlT8c033yAzMxMbN27MfUV1TCKRKEcAiIqKylWawMBAAMDo0aOznU/ly5fHqFGjsnSguXHjBq5cuQIrKyts2bIFNWvWVK7T09PDkCFDMHDgQKSmpmZrS1m7dm20bt0623BCpqammDx5MurVq4dnz57h4cOHBVJGRT5NmzbN9hpXIpGgUaNGOQ7Zoori6ZriiaGC4v9jx47Ntj49PV35RDq3T+c01b17dwwdOhT6+vrKZW5ubpBKpRAEAZcvX87T/t3c3GBqagofHx+cO3cuV2mqVq0KZ2fnbE1FihUrBldXV3Tq1AkxMTGfvW4lEomyI92+ffsQHh6uXSX+z8fpK1asmKd9qaJtvRXn6ogRI7KlLVWqFAYOHJjlTZVi+z59+mQbtaJYsWLo1KkT6tWrp7N6FVYMGClfKNp+JCYmfnbb0qVLo3jx4nj37h08PT3zpTy9e/fOU/patWqhQYMG2Zbb2toqe/Pl9YtCVxTl+O6772BiYpJtfYMGDZQ3t5yOd//+/WFoaJhted26dQF8aP+TW/fu3UN8fDyKFSuGgQMHqtxmxIgRAD68YtJlg3tNaXLeAlA2tzh58mSutle8kndycsoSyH+sQ4cOAD4E9J9KS0vDmTNn8PPPP2PkyJEYOHAgvv/+e3z//fcICgoCgGwdJsQqoyKfBw8eIDg4OFd5qVOnTh2UKFECQUFBCAkJAfChacOdO3dQtmxZ9OzZE8WLF88SMD548ABJSUmoUKFCvo1fmtM5rLimFJ+DtiwtLZXXw5o1a/D+/ftcpcvMzMTly5exaNEiuLm5YdCgQcpzQ/HD0M/P77P7+eabb9CsWTOkpqbmeVKAhIQE5b8/bg+oS9rUW9HW9/Tp08jMzPxsHortL126lOt7Q1HENoyULxQXVW46R+jp6WH48OHYuHEjfvjhB0ilUjRr1gx169ZFo0aNcmxnp4nPNfjOS/oaNWrgn3/+Uf4KLWgvXrwAkL3TyMekUinu37+fY5lzaiOo+NWtyU1TUR5bW1uVAayiPACQkZGBV69eqexQIQZNzlsAGDVqFObMmYOFCxdi165d+Oabb1CvXj00atRIZQcUf39/AB/aFeY0dElqaiqAD52WPvbixQuMHj1aOY5dTmJjYwukjHXr1kXjxo1x+/ZtODk5oUGDBmjUqBHq1q2LBg0a5PjZ58TIyAj169fH9evX4eXlhT59+uDRo0dISEhAu3btYGRkhHr16sHLywthYWEoV66cRu0XtaXLayMnQ4cOxf79+xEYGIijR4+ib9++arePiorC6NGj4ePjo3a7T8+NnEydOhV9+vTB0aNHMXz48Fx3QvnUx595UlKSVvtQR9t6jxgxAjdu3MDWrVvh7u6OFi1aoF69emjQoIHKtui9e/fGrl274OXlhRYtWqB58+aoX78+6tevj1q1amV52lyUMWAknYuNjVX+slTVAUWVCRMmoHz58ti3bx/8/f0REBCAPXv2QCKRKF8n5yWIyOtA4VZWVjmu0+UXhS4ojr26MiuC8JzKnNPx+nR8Ql2Vx8rKChKJBIIgFNhxFAQBb968AZD787ZPnz4wNzfHjh078ODBA/z111/466+/AHx4QjZ16tQsr2ffvXsHAAgODv7sUzjFcELAh6coEyZMwMuXL2Fvb4/x48ejVq1asLS0VHYEmjFjBo4fP46MjIwCKaNEIsHmzZuVX8K3b99W9kAuXrw4unTpgmnTpuX41FKVpk2b4vr167h58yb69OmjDAibNWumXO/l5YWbN2+iR48e2dbnh5yelCmuDUEQdJLH2LFj8csvv2D9+vXo3r272u1/+ukn+Pj4wNbWVtk8wdraWtl84X//+x82btyY7dzISa1ateDk5IRz585hzZo1WLt2rVb1KFeunPLfwcHBsLe312o/OdG23t988w327NmDzZs34/bt2zh69Khyysjq1atjwoQJWZrOWFlZ4dChQ9iwYQMuXLig/AM+vMIeMmQIRo4cmS8DnhcmRbt2VCAUPSsB5Lpdh0QiQd++fdG3b19ER0fD29sbt2/fxpkzZ3Djxg0MGTIEJ06cUDl8jRjUtcVUtHfL6QlKTl8g+fXq1dTUFHFxcWrL/PbtWwA5l1nX5QHUH8PIyEjlcRKjTKo8efJEGSxp0h6pQ4cO6NChA+Lj4+Ht7Y27d+/in3/+wYMHDzBixAgcOnRI+UWpCDYWLVr02adGH3v06BECAgJQrFgx7Ny5U2XQpe7pkRhlBD58dpMnT8bkyZPx6tUreHt748aNG7hw4QKOHDmCwMBA7N+/P9dPZBSBrCIQVMxBr3iC+HE7R2dnZzx48CBLui9Zv379sGvXLgQHB2Pfvn05Bo2RkZG4cuUKAGDz5s3Kp/Ufy+2TxY9NnjwZFy9exLlz5z77BC8nFhYWqF69Op49e4abN29mG2YtL/Ja78aNG6Nx48ZITk7Gf//9h3v37uH8+fN48uQJJkyYgK1bt6J169bK7StWrIhly5Zh8eLFePz4Mby9veHp6Ynr169j9erViI+Px/Tp03VWv8KIbRhJ5/744w8AQM2aNZWdCDRRqlQptG/fHrNnz8Y///yDChUqIDY2FqdPn1Zuo+1YZ9p69uxZjuuePn0KANle2yi+eHPqQPG5V4vaUpQjICAgx20U68QYIFdRnpCQkByfHirKY2BgUGBDU+zduxfAh6cJn44JmRtmZmZo3bo1pk6dirNnz6Ju3bpIT0/H4cOHldsovtQUHTNyS9Eurnr16iqDxYyMjGydXcQu46fs7OzQs2dPrFy5En///TckEgnu37+Px48f53ofjo6OMDU1xdu3b5Vf0pUrV1beV2rVqgVTU1N4eXnB29sbaWlpqF69ukbNWMS+l+SWoaEhJk6cCADYunUr4uPjVW4XHBwMQRBgbm6uMmgCPsy0oqkqVaoox6tctWqVxukVFIHuiRMnlMMh6YKu6l2iRAk0a9YM48ePx/Hjx5VPFv/880+V2+vr68PR0RGDBw/Gjh07MG/ePOX2uni6XJgxYCSd2rJlS7ZejHlhamqqbIv3cY+7j1+ZfvxaLL/4+PiovPmEhoYqe0d/OtaXIvBRlS4jIwMHDx7MMT9F/bSpm+JX8d9//63yKeb9+/eVZfr4F3R+qV+/PszMzJCamprjTXjXrl0APjwZymvzAW2cPHkSR44cAfChzd/nBhf+HAMDA2XQ+fF526lTJwAfGttr0nFI0bv57du3Kr+Ujh49iujo6AItozoymUzZ416Tnrf6+vrK8TA3b96M5OTkLE8P9fX10bBhQ4SFhSlfs2vaflFxbMW4j2iqa9eukMvliI2Nxfbt21Vuo7heEhMTVbYTvH79ukZB+sfGjx+v7Fh0/fp1rfYxYMAAWFtbIz4+HjNnzkRaWpra7V+8eIGzZ89+dr/5UW+JRKIc/zW356li+8TExELTLCm/MGCkPMvIyFAOFP37778DAIYMGaLsSfk5z549w+zZs3H37t1sPdYUDd6BD08TFCwtLZVfQJ8Ou5EfDA0NMWvWLOW8xsCHBv+TJ09Geno6pFIp2rZtmyWN4v9HjhzJUsaEhATMmzdPbW/KSpUqAdCubt999x1Kly6N6OhoTJ48Ocuv+idPnmDGjBkAPrTj+XTQ8/xgbGyM4cOHAwDWr1+Pf//9V7kuLS0Nq1evxrVr16Cnp4cxY8bke3kUMjMz8ejRI/z000+YNm0agA+vbnM7hJNiyrOrV69m+xJ89OiR8kvv4/O2TZs2aNGiBVJTUzFs2DBcvXo1235fv36N7du349ChQ8pl9evXh6GhISIiIrB69WplmyxBEHDy5EksWrQo23A7Ypfx+PHj+N///pflGgE+DHWzfft2vHv3Dvr6+lmG6ckNRQCoGGLm0/aJivWKqUg1DRgrVaoEiUSCqKgo5duCwkIikSinW3V3d1e5TfXq1WFpaYmMjAz88ssvWX4kXrt2DVOmTFF5buRG2bJlMWjQILX5f46ZmRnWrFmDYsWKwdPTEwMGDMCNGzey9f4OCgrCypUr0bNnT7VvdBTyUu8JEybg/Pnz2X5QBwUFKX/If3xNrFq1CgcOHMjWrObdu3fYsmULgA+dobSZAetLwjaMpJEjR47gxo0bAD584cbHxyMkJET569zMzAxTp07VaPL69PR0HDlyBEeOHEGJEiVQqVIlGBkZITw8HBEREQA+TAv38RhuEokELi4u2LdvH8aMGYMaNWrA3NwcwIcnRK1atdJVlQF8CMKuXLmCLl26oHr16jAwMMDTp0+RkZEBS0tL/P7779kaPLu4uODvv//GgwcPMHToUNja2sLc3BzPnj1DsWLFMGPGDCxevFhlfj169ICHhwd27tyJCxcuoGzZstDT00PLli3xww8/qC2rpaUl1q5di9GjR+PSpUto1aoVatSogZSUFOWXuVQqVY7HKAY3NzcEBATg7NmzGDduHGxsbGBlZYVXr17h3bt3kEgk+Omnn3I1u4o2Pu7xKwgCEhISEBoaqnwiUKxYMfzwww8YPXp0rjv2ZGZm4ty5czh37hwMDQ1hZ2cHY2NjREVFKYeBqVOnTrYAdPXq1Zg4cSJu3LiBkSNHwtLSEhUqVEBmZibCwsKUTRjGjx+vTFOqVCmMHj0a69atw5YtW/D333+jQoUKCA8Px9u3b9GyZUtYWloqZ1kpiDLGxMRg48aN2LhxIywsLGBrawtBEPD69Wtl29Bp06Zp3ExFEQAKggCJRJKtfeLH6/X09NC4cWON9m9hYYFvv/0Wly5dQs+ePSGVSpXtaGfPnq3zjhqaat26NRo1aoQ7d+6oXG9gYIAZM2bgp59+wrFjx3DhwgXY2dkhJiYGoaGhsLe3R7NmzZRjoWrqhx9+wMGDB5WfoTYaNmyIAwcOYMqUKXj48CGGDRsGU1NT2NraKn8IKe71ZmZmcHBw+Ow+81LvGzdu4Ny5czAwMEDFihVRsmRJxMXF4dWrVxAEAZUrV8aECROU2z9//hxbt27FwoULUb58eVhZWSE5ORmvXr1CWloajI2NsWjRIq2Pz5eCASNp5M2bN8qepIaGhjAzM4OdnR3s7e3RtGlTODs7a/xKsXLlyli8eDG8vLzg5+eHsLAwJCYmwszMDN988w1cXFzQvXv3bF/kM2bMgKmpKc6fP4+XL18qh/ro2bOnbir7EXNzcxw6dCjbXNKtW7fGjz/+mKU3oIKBgQF27tyJDRs24Ny5cwgPD0dqaio6deqEH3/8UfmFrYqTkxOWLFmCv//+G8+ePcPr168hCEKOcwV/qmHDhjh16hS2b9+OK1eu4OnTpzAwMICDgwOcnZ3h6uoq6qtffX19rF69Gh06dMChQ4fg5+cHf39/WFhYoHPnzhg6dCjq1KmTb/lHRUUpgxxDQ0OYmJigXLlykMvlaNCgAbp27ar8wZFbJiYm+O2333Dz5k34+PggIiIC8fHxMDU1RcOGDdGpUyf069cv2+vtkiVLYseOHTh//jxOnDgBHx8f+Pv7w8TEBGXKlEGzZs3Qtm3bbM0Fxo8fD1tbW/zxxx94+vQpAgMDUblyZYwYMQKDBw/GnDlzCrSMTk5OyMzMxK1bt/Ds2TO8ePEC6enpsLKyQosWLTBw4EA0bNhQo2MMfHidbWlpiZiYGEil0mxtOOVyuXK9vb29xp8jACxfvhxr166Fp6cnAgICkJ6eDgB5CpJ0aerUqfjuu+9yXN+rVy9YWlpi27Zt8PPzQ2BgIGxtbTFu3Dj88MMP2LZtm9Z5m5ubY9SoUXlqxwh8aI965swZnD59Gh4eHnj48CGCgoKUP7pbtWqF1q1bo1u3brn+DLWt9/Lly3H9+nXcv38f4eHhCA4ORvHixeHo6Ij27dvD1dU1S+e7sWPHQiqV4vbt2wgJCcHjx4+hr68PW1tb5Vzs+TXuZ2EiEYp6K00iIiIiyhO2YSQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESkFntJ56OaNWsiMzOzyI/NRERERF++hIQE6Onpwc/PL9s6PmHMR5mZmUV+qiAiIiIqGgRByDaBhgKfMOYjxZPFu3fvFnBJiIiIiNRTN1YqnzASERERkVoMGImIiIhILQaMRERERKQWA0YiIiIiUosBIxERERGpxYCRiIiIiNRiwEhEREREajFgJCIiIiK1GDASERERkVoMGImIiIhILQaMRERERKQWA0YiIiIiUosBIxERERGpZVDQBfiaJSSn40VInEZpqtiaw7SEYT6ViIiIiCg7BowF6EVIHGZvuq5RmiVjmqNWdat8KhERERFRdnwlTURERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqGRR0AVTx9fXFjRs38OjRIzx8+BAhISEAgDNnzqBatWrZto+MjISnpyc8PT3x9OlTvHnzBhKJBFWrVoWTkxMGDx6M4sWL55jf2bNnsXfvXjx58gQAIJfLMWTIEDg5OeVPBYmIiIi+IIUyYNywYQMuXryY6+2XLVuGkydPQk9PDzKZDK1atUJMTAwePnyIVatW4eTJk9izZw9KlSqVLe2aNWuwadMmGBoaomnTpgCAmzdvYsKECfjxxx8xfvx4ndWLiIiI6EtUKAPGunXrQiqVwtHREY6OjhgwYIDyKaMqJUuWxI8//oh+/fqhTJkyyuWhoaEYPXo0njx5giVLluC3337Lku7evXvYtGkTzMzMcODAAUilUgBAQEAABgwYgHXr1qFly5aoU6dO/lSUiIiI6AtQKAPGH374QaPt582bB4lEkm15+fLl8fPPP2PAgAE4f/480tLSYGRkpFy/fft2AICbm5syWAQAqVSKH374AatWrcK2bduwfv16LWtCRERE9OUrEp1eVAWLCjVr1gQApKamIjY2Vrk8LS0N169fBwB07tw5W7ouXboAAK5evYq0tDQdlpaIiIjoy1IkAkZ1Xr16BQAwNDSEhYWFcnlgYCBSU1NhYWEBW1vbbOlsbW1hYWGBlJQUvHz5UqTSEhERERU+RT5g3LVrFwCgVatWWV5Hh4aGAgBsbGxyTFuuXDkAUNt+koiIiKioK9IB49mzZ+Hu7o7ixYtj6tSpWdYlJiYCAEqUKJFjemNj4yzbEhEREX2NimzA6OPjg9mzZwMA5s+fr3L8RkB9+0dBEPKlbERERERfkiIZMD59+hSjRo1CUlISJk+ejN69e2fbxsTEBACQlJSU436Sk5OzbEtERET0NSpyAePLly8xbNgwxMbGws3NDaNHj1a5Xfny5QEAYWFhOe5LsU6xLREREdHXqEgFjK9fv8bQoUPx9u1bDB48GFOmTMlx26pVq6JYsWKIiYlRdoD5WGhoKGJjY1G8eHFUqVIlP4tNREREVKgVmYAxPDwcQ4cOxZs3b9C/f3/MmTNH7fZGRkZo3rw5gA+dYz515swZAEDLli2z9K4mIiIi+toUiYAxKioKQ4YMQXBwMHr06IGFCxfmKt2IESMAAJs3b8bTp0+Vy58+fYotW7Zk2YaIiIjoa1Uopwa8fPkyNm7cqPx/REQEAGDKlCkoVqwYAKB169YYN24cgA9TA7548QIGBgZIT0/H9OnTVe539uzZKFWqlPL/DRs2hJubG7Zs2YKePXuiWbNmAAAvLy+kp6dj7NixqFevXr7UkYiIiOhLUSgDxujoaDx48CDbcn9/f+W/q1atqvx3fHw8ACAjIwOnT5/Ocb+TJk3KEjACH4JQuVyOvXv34u7duwAAR0dHDBkyBJ06dcpTPYiIiIiKgkIZMPbq1Qu9evXK9fZ//PFHnvLr3LmzyvmkiYiIiKiItGEkIiIiovzDgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtg4IugCq+vr64ceMGHj16hIcPHyIkJAQAcObMGVSrVi3HdMHBwVi7di28vLwQFxeHcuXKwdnZGWPGjIGxsXGO6c6ePYu9e/fiyZMnAAC5XI4hQ4bAyclJtxUjIiIi+gIVyoBxw4YNuHjxokZp/Pz8MGjQICQmJkImk6FevXrw8fHB1q1b4enpiQMHDsDU1DRbujVr1mDTpk0wNDRE06ZNAQA3b97EhAkT8OOPP2L8+PE6qRMRERHRl6pQBox169aFVCqFo6MjHB0dMWDAAOVTRlXev3+PqVOnIjExERMnTsTYsWMBAGlpaRg/fjw8PT2xcuVKLFy4MEu6e/fuYdOmTTAzM8OBAwcglUoBAAEBARgwYADWrVuHli1bok6dOvlXWSIiIqJCrlC2Yfzhhx8wadIktG/fHuXKlfvs9h4eHggMDET16tUxZswY5XIjIyP8+uuvMDAwwJEjRxATE5Ml3fbt2wEAbm5uymARAKRSKX744QcAwLZt23RRJSIiIqIvVqEMGDXl4eEBAOjUqRMkEkmWdWXLlkX9+vWRnp4OT09P5fK0tDRcv34dANC5c+ds++zSpQsA4OrVq0hLS8uvohMREREVekUiYPT39wcAODg4qFzv6OgIAMpOLQAQGBiI1NRUWFhYwNbWNlsaW1tbWFhYICUlBS9fvtR9oYmIiIi+EEUiYAwNDQUA2NjYqFxftmxZAMjSDvJzaQAoX4eraz9JREREVNQViYAxMTERAFCiRAmV601MTLJsl5s0AJRD8XycjoiIiOhrUyQCRoVP2y8qCIKgcZrPpSMiIiL6WhSJgFHxBDEpKUnlesVyxXa5SQMAycnJ2dIRERERfW2KRMCoaIcYFhamcn14eDgAoHz58splin/nlObjdR+nIyIiIvraFImA0d7eHgDw6NEjlet9fX0BfJjyT6Fq1aooVqwYYmJilB1gPhYaGorY2FgUL14cVapUyYdSExEREX0ZikTA2LZtWwDAuXPnsrU7jIiIwL1792BgYIDWrVsrlxsZGaF58+YAPswl/akzZ84AAFq2bAkjI6P8KjoRERFRoVdkAsbKlSsjICAAW7ZsUS5PS0vD/PnzkZGRgd69e8PS0jJLuhEjRgAANm/ejKdPnyqXP336VLkfxTZEREREX6tCOZf05cuXsXHjRuX/IyIiAABTpkxBsWLFAACtW7fGuHHjAAD6+vpYtWoVXF1dsXr1apw9exZ2dnZ48OABwsLCIJVKMX369Gz5NGzYEG5ubtiyZQt69uyJZs2aAQC8vLyQnp6OsWPHol69evldXSIiIqJCrVAGjNHR0Xjw4EG25YoZXYAPbRA/5ujoCHd3d6xbtw5eXl54/vw5ypUrh5EjR2Ls2LE59nSeMmUK5HI59u7di7t37yr3NWTIEHTq1EmHtSIiIiL6MhXKgLFXr17o1auXxuns7Ozw22+/aZyuc+fOKueTJiIiIqIi0oaRiIiIiPIPA0YiIiIiUosBIxERERGpxYCRiIiIiNRiwEhEREREajFgJCIiIiK1GDASERERkVoMGImIiIhILQaMRERERKSWVgFjeHi4rstBRERERIWUVgFju3btMHbsWFy+fBmZmZm6LhMRERERFSJazSWdkZEBDw8PXLp0CdbW1ujTpw969+4NW1tbXZePiIiIiAqYVk8YL1y4gB9++AHW1taIiIjApk2b0KFDB4wcORLnz59HRkaGrstJRERERAVEq4CxYsWKmDJlCi5fvoyNGzeidevW0NPTw7Vr1zBx4kS0bt0av/32G16+fKnj4hIRERGR2PLUS1pPTw9t27bF5s2bcenSJUyePBmVKlVCVFQUtm/fjk6dOsHV1RWnTp1CWlqarspMRERERCLS2bA61tbWcHNzw7lz57Bnzx507doV+vr6uHv3LqZPn46WLVti6dKlCAoK0lWWRERERCQCnY/DmJiYiFevXuHVq1d4//49BEGAIAiIi4vDnj170LlzZyxatIjtHImIiIi+EFr1klbl/v37OHToEM6ePYuUlBQIgoDSpUujV69e6NevHyIjI/HXX3/h9OnT2L9/P0xMTDB58mRdZU9ERERE+SRPAWNsbCzc3d1x+PBhPH/+HIIgQCKRoEmTJujfvz86dOgAA4MPWVSsWBH16tXDwIEDMWDAAJw4cYIBIxEREdEXQKuA0cvLCwcPHsTFixeRnp4OQRBgaWmJnj17on///rCzs8sxbe3atVGzZk08evRI60ITERERkXi0ChiHDRum/HejRo3w3XffoWPHjjA0NMxV+mLFinGGGCIiIqIvhFYBo7m5ufJpYpUqVTRO/8cff2iTLREREREVAK0CxqtXr8LIyEjXZSEiIiKiQkirYXUYLBIRERF9PbQKGP39/fHTTz/hxIkTarc7ceIEfvrpJwQEBGhVOCIiIiIqeFoFjIcPH4a7uzusra3VbmdtbY1jx47h6NGjWhWOiIiIiAqeVgHjrVu3UKJECTRr1kztds2aNUOJEiXg5eWlVeGIiIiIqOBpFTCGhYXB1tY2V9tWqFABYWFh2mRDRERERIWAVgFjWlparsdcNDQ0RHJysjbZEBEREVEhoFXAWKZMGQQGBiI1NVXtdqmpqQgMDISVlZVWhSMiIiKigqdVwNiwYUOkpqZi165darfbvXs3UlJS0KhRI60KR0REREQFT6uBuwcPHgx3d3esW7cOGRkZGDZsGExMTJTrk5KSsGvXLmzYsAF6enpwdXXVWYHVCQwMxLZt23Dr1i1ERETAwMAAlSpVQvv27TF8+HCYmppmSxMcHIy1a9fCy8sLcXFxKFeuHJydnTFmzBgYGxuLUm4iIiKiwkwiCIKgTcJNmzbhf//7HyQSCYyMjFCjRg2ULFkS7969w9OnT5GWlgZBEDBp0iSMHj1a1+XO5u7duxgxYgRSUlJQsWJFyGQyJCcn4/79+0hKSkKVKlXw119/wcLCQpnGz88PgwYNQmJiImQyGezs7ODj44OwsDDIZDIcOHBAZZCZWw0bNlSWTZWHzyIxe9N1jfa5ZExz1KrOV/xERESkW+riFq2eMALAmDFjULZsWaxevRpv377Fo0ePsqwvU6YMpkyZgh49emibhUYWLFiAlJQUuLm5YdKkSdDT+/C2PTo6GsOHD8fjx4+xfft2TJs2DQDw/v17TJ06FYmJiZg4cSLGjh0L4EOHnvHjx8PT0xMrV67EwoULRSk/ERERUWGldcAIAL169UK3bt1w//59BAQEICEhAaamppDJZKhXrx4MDPK0+1yLiYnB06dPYWhoiPHjxyuDRQAoVaoURo4cialTp8LHx0e53MPDA4GBgahevTrGjBmjXG5kZIRff/0Vbdu2xZEjRzBp0iRYWlqKUg8iIiKiwijPEZ2hoSEaN26Mxo0b66I8WpdBHcVb948DPw8PDwBAp06dIJFIsmxftmxZ1K9fH7dv34anp6doT0mJiIiICiOtekkXNqampqhbty7S09Oxfv16ZGZmKtdFR0dj+/btAIDevXsrl/v7+wMAHBwcVO7T0dERAPDkyZP8KjYRERHRFyHPTxgTEhIQHByMxMREqOs/k99D6yxatAijRo3Cli1bcObMGchkMqSkpMDb2xumpqZYvnw5WrVqpdw+NDQUAGBjY6Nyf2XLlgUAhISE5Gu5iYiIiAo7rQPGR48eYfny5bh3757aQBEAJBIJ/Pz8tM0qV2rUqIG//voLEyZMwIMHDxAcHKxc16xZM1SrVi3L9omJiQCAEiVKqNyfYpggxXZEREREXyutAkZfX1+4uroiJSUFgiDAyMgIpUuXztYWUEw3b97EhAkTUKZMGezYsQN16tRBcnIyPD09sWLFCly7dg0bNmxAy5Yts6TLqcxajjZEREREVORoFTCuW7cOycnJqF+/PubOnYuaNWvqulwaiYuLw8SJE5Geno5t27YpXzObmZmhb9++MDMzw8SJE7FgwQKcP38e+vr6MDExQWxsLJKSklTuU7H84wHJiYiIiL5GWnV68fb2RrFixbBx48YCDxYB4PLly4iNjUXdunVVtkns0KEDDA0N8fr1a+WrasV2YWFhKvcZHh4OAChfvnw+lZqIiIjoy6BVwJiamoqqVatmmTWlICmCvpyeBurr6yun+YuLiwMA2NvbA0C2AccVfH19AQByuVynZSUiIiL60mgVMNrZ2eX4KrcgWFtbA/gw1V9GRka29YGBgcpA0dbWFgDQtm1bAMC5c+eytVeMiIjAvXv3YGBggNatW+dn0YmIiIgKPa0Cxl69eiEoKAiPHz/WdXm00qpVKxQvXhwhISFYtmwZ0tPTleuioqIwZ84cAEDjxo1hZfVhHua2bduicuXKCAgIwJYtW5Tbp6WlYf78+cjIyEDv3r05ywsRERF99SSCFt2BMzMzMWrUKLx69QorVqxA/fr186NsGjl06BDmz5+PzMxMlC1bFjVr1kRKSgoePnyIhIQEWFlZYd++fahSpYoyzaNHj+Dq6oqkpCTI5XLY2dnhwYMHCAsLg1QqxYEDB2BmZqZ1mdRN4g0AD59FYvam6xrtc8mY5qhV3UrrMhERERGpoi5u0aqX9Jw5c1C6dGncunULAwcOhEwmQ+XKlXMc01AikWDJkiXaZJVrffv2hVQqxZ49e+Dt7Y1r165BX18fFSpUQKtWrTBy5EiULl06SxpHR0e4u7tj3bp18PLywvPnz1GuXDmMHDkSY8eOZQ9pIiIiImj5hFEul0MikeR6rEKJRFJoXl+LiU8YiYiI6Euh8yeM48ePz1uJiIiIiOiLwYCRiIiIiNTSqpc0EREREX09dBIwCoKA6OhohIaG6mJ3RERERFSIaPVKWuHu3bvYtm0bbt++jZSUFEgkEvj5+SnXb926FS9evMDMmTMLzawwRERERKQZrZ8w7ty5E4MHD4anpyeSk5MhCEK2XtOmpqZwd3fHpUuX8lxQIiIiIioYWgWMd+/excqVK1GsWDHMmjULFy9eRL169bJt17FjRwiCwICRiIiI6Aum1SvpXbt2AQAWLVqELl26APgw1uKnrKysUK5cOQQGBuahiERERERUkLR6wvjff//BwsJCGSyqY21tjYiICG2yISIiIqJCQKuAMS4uDjY2NrnaNjMzE6mpqdpkQ0RERESFgFYBo7m5OcLCwj67XWZmJl69epVtDmciIiIi+nJoFTA6OjoiJiYmxzmSFf755x8kJCSo7BBDRERERF8GrQLGPn36QBAEzJs3D0FBQSq38fb2xsKFCyGRSNCnT588FZKIiIiICo5WvaQ7dOiAjh074vz583BxcUGTJk0QEhICAFi7di28vb1x+/ZtZGZmonv37mjWrJlOC01ERERE4tF64O7ffvsN3333HVJTU3H58mWEh4dDEARs2rQJN2/ehCAI6NevHxYvXqzL8hIRERGRyLSeGtDIyAgLFizA0KFD8c8//8Df3x/v3r2DsbExpFIpnJ2dIZVKdVlWIiIiIioAeZpLGgAqV66M0aNH66IsRERERFQIaf1KmoiIiIi+DgwYiYiIiEgtrV5Jt2vXTqPtJRIJ/v33X22yIiIiIqICplXAqBhCJ7ckEok22RARERFRIaBVwLh3794c1yUnJ+PFixc4dOgQgoKCMHPmTPaWJiIiIvqCaRUwNm7cWO361q1bw9XVFXPnzsW6detw7NgxrQpHRERERAUv3zq96OvrY86cOUhJScG6devyKxsiIiIiymf52kva1NQU1apVw7Vr1/IzGyIiIiLKR/k+rM67d+8QGxub39kQERERUT7J14Dx1q1bCAkJQZkyZfIzGyIiIiLKR1p1erlz506O6wRBQGRkJP777z8cPnwYAODk5KRd6YiIiIiowGkVMLq6uuZqbEVBEFCnTh2MGzdOm2yIiIiIqBDQKmAsX758juskEgmMjY1hZ2eHtm3bwsXFBfr6+loXkIiIiIgKllYBo4eHh67LoTNxcXHYuXMnPDw88Pr1awBA2bJl0aBBA0yYMAFly5bNsn1wcDDWrl0LLy8vxMXFoVy5cnB2dsaYMWNgbGxcEFUgIiIiKlTyvZe0mPz8/NCpUyds3rwZ7969Q7NmzdCkSRMAwOHDhxEcHJxtexcXF5w4cQKlSpXCt99+i7S0NGzduhXfffcdEhISCqIaRERERIWKVk8YC6OoqCgMHz4c7969w/z58zFgwIAs7SwDAwNhbm6u/P/79+8xdepUJCYmYuLEiRg7diwAIC0tDePHj4enpydWrlyJhQsXil4XIiIiosKkyDxhXLVqFWJiYjBq1CgMHDgwW6ecqlWronTp0sr/e3h4IDAwENWrV8eYMWOUy42MjPDrr7/CwMAAR44cQUxMjGh1ICIiIiqMtHrCaG9vn+eMJRIJ/Pz88rwfAIiPj8epU6dgaGiIYcOG5SqNoh1mp06dsgWXZcuWRf369XH79m14enqiR48eOiknERER0ZdIq4BREIQ8Z6yLfSh4e3sjNTUV9erVg7m5OTw8PODl5YWkpCRUqFABHTt2RLVq1bKk8ff3BwA4ODio3KejoyNu376NJ0+e6KycRERERF8irQLGixcv4t9//8XKlStha2uLwYMHo0aNGrCyskJUVBQCAgLwxx9/ICQkBNOnT0e7du10Xe4sAgICAAA2NjYYPnw4bty4kWX92rVr4ebmhkmTJimXhYaGKtOoouhNHRISkg8lJiIiIvpyaBUwRkREYOXKlejWrRsWL14MPb3/3xSyatWqaNSoEQYMGIA5c+ZgxYoVqFOnDurUqaOzQn8qLi4OAHD+/HlIJBLMmDEDXbt2hb6+Ps6dO4fly5dj06ZNsLW1Rd++fQEAiYmJAIASJUqo3KeJiUmW7YiIiIi+Vlp1etmyZQuMjIzw888/ZwkWPyaRSDB//nwYGRlh8+bNeSrk52RmZgIAMjIyMGbMGIwYMQJly5aFlZUVBg4ciClTpgAANm7cqLKcqujylTkRERHRl0yrgPHBgweoVq0aihcvrna74sWLo2rVqvjvv/+0ySbXTE1Nlf9WPEH8WL9+/QB8eA0dFBQE4P8/QUxKSlK5T8VyxXZEREREXyutAsakpCRER0fnatuYmJgcgzJdsbW1BfBhSJwyZcpkW29sbIxSpUoBACIjIwH8/7aLYWFhKvcZHh4OQP00iERERERfA60CxkqVKiE0NBRXrlxRu92VK1cQEhKCSpUqaVW43FL0dE5LS1PZ5vD9+/eIj48HAOV0f4qhgR49eqRyn76+vgAAuVyu8/ISERERfUm0Chh79+4NQRAwefJk7NmzBykpKVnWp6SkYO/evZgyZQokEgl69+6tk8LmpHr16qhcuTIA4ObNm9nW3717F+np6TA2NkbVqlUBAG3btgUAnDt3Llt7xYiICNy7dw8GBgZo3bp1vpadiIiIqLDTKmB0dXVFixYtkJiYiGXLlqFZs2bo2rUrhgwZgq5du6JZs2ZYunQpEhIS8M0338DV1VXX5c7Gzc0NALBixYosc0aHhYVh0aJFAD60bzQyMgLwIWCsXLkyAgICsGXLFuX2aWlpmD9/PjIyMtC7d29YWlrme9mJiIiICjOJoGV34PT0dGzatAl79+5FQkJCtvWmpqZwdXXF2LFjYWhomOeCfo4gCJg1axbc3d1RokQJ1KlTB/r6+vjvv/+QmJiI+vXrY+fOnVmG0Xn06BFcXV2RlJQEuVwOOzs7PHjwAGFhYZBKpThw4ADMzMy0LlPDhg0BfHjCqcrDZ5GYvem6RvtcMqY5alW30rpMRERERKqoi1u0GocRAAwNDTFhwgSMGjUK9+7dQ2BgIBITE2FiYoKqVauiQYMGOY5xmB8kEgmWL1+Oxo0b4++//4aPjw8yMzNRuXJldOvWDYMHD1Y+XVRwdHSEu7s71q1bBy8vLzx//hzlypXDyJEjMXbsWPaQJiIiIkIeAkaFEiVKoEWLFmjRooUuypNnvXv31qjNpJ2dHX777bd8LBERERHRly3PASN9GRKS0/EiJE6jNFVszWFaIv+bExAREVHhlqeA8dWrV9izZw+8vLwQFhaG1NRU+Pn5KdcfPnwYYWFhGDZsGF/vFrAXIXFsL0lERERa0TpgPHPmDGbPno3U1FTlsDSfTrMXFxeHDRs2oFq1aujUqVPeSkpEREREBUKrYXX8/f0xY8YMpKWlYeDAgdi7d69y8OyPOTs7QxAEXLx4Mc8FJSIiIqKCodUTxu3bt+P9+/eYPXu2cozFYsWKZdvO1tYWpUuXxtOnT/NWSiIiIiIqMFo9Ybx9+7ZynMXPKVeunHJeZiIiIiL68mgVMEZHR6NixYq5y0BPD0lJSdpkQ0RERESFgFYBo6mpKSIjI3O17evXr2FhYaFNNkRERERUCGgVMMpkMrx9+xb+/v5qt7t27RpiYmJQu3ZtrQpHRERERAVPq4Cxe/fuEAQBCxYsQGxsrMptgoODMX/+fEgkEnTv3j0vZSQiIiKiAqRVL+mePXvi6NGjuHfvHrp27QpnZ2e8ffsWAHDkyBF4e3vj9OnTSElJQfPmzdGxY0edFpqIiIiIxKNVwKinp4dNmzZh6tSpuHr1Kvbv369cN3fuXOVA3s2bN8eaNWt0UlAiIiIiKhhaz/RSsmRJbNu2DV5eXjhz5gz8/f3x7t07GBsbQyqVolOnTvj22291WFQiIiIiKghaBYx37twBANStWxfNmjVDs2bNdFooIiIiIio8tAoYXV1dUbZsWXh6euq6PERERERUyGjVS9rc3Bxly5bVdVmIiIiIqBDSKmCUSqUIDQ3VdVmIiIiIqBDSKmDs378/IiMjcfr0aV2Xh4iIiIgKGa3aMHbt2hUPHz7E7NmzERISgj59+qBUqVK6LhsRERERFQJaBYzt2rUDAGRkZGD16tVYvXo1LC0tUaJECZXbSyQS/Pvvv9qXkoiIiIgKjFYBY0hISLZl0dHROW4vkUi0yYaIiIiICoFcBYwJCQnQ19dXPkHcu3dvvhaKiIiIiAqPXAWMDRs2RMOGDbFv3z4AQOPGjZXrLl68CAsLCzRo0CB/SkhEREREBSrXvaQV80N/aty4cVi9erXOCkREREREhYtWw+p8KqdgkoiIiIi+fDoJGImIiIio6GLASERERERqMWAkIiIiIrUYMBIRERGRWrkeuPvNmzdYv369xusUxo8fr1nJiIiIiKhQ0Chg3LBhg8p1oaGhOa5TYMBIRERE9GXKVcDYqFGj/C6HzgmCgCFDhuDWrVsAgDNnzqBatWrZtgsODsbatWvh5eWFuLg4lCtXDs7OzhgzZgyMjY3FLjYRERFRoZOrgPGPP/7I73Lo3N9//41bt25BIpHkOE6kn58fBg0ahMTERMhkMtSrVw8+Pj7YunUrPD09ceDAAZiamopcciIiIqLCpUh2egkLC8PKlSvRsmVLlC9fXuU279+/x9SpU5GYmIiJEyfixIkTWLduHS5cuIDWrVvjyZMnWLlypcglJyIiIip8imTAOH/+fGRmZmLBggU5buPh4YHAwEBUr14dY8aMUS43MjLCr7/+CgMDAxw5cgQxMTEilJiIiIio8CpyAaO7uzs8PT0xceJEVKhQIcftPDw8AACdOnWCRCLJsq5s2bKoX78+0tPT4enpma/lJSIiIirsilTAGBkZiaVLl8LR0RGurq5qt/X39wcAODg4qFzv6OgIAHjy5IluC0lERET0hSlSAeMvv/yChIQELF68GPr6+mq3DQ0NBQDY2NioXF+2bFkAQEhIiG4LSURERPSFKTIB4/nz53Hu3DkMHz4ccrn8s9snJiYCAEqUKKFyvYmJSZbtiIiIiL5WRSJgjI2NxcKFC1GpUiWMGzdOo7Sftl9UyGkoHiIiIqKvTa5neinMli5disjISOzevRvFixfPVRoTExPExsYiKSlJ5XrFcsWTRiIiIqKvVZEIGC9evIhixYph48aN2LhxY5Z1b9++BQDMnDkTJUqUwMCBA+Hs7AwbGxvExsYiLCxM5Svs8PBwAMhxHEciIiKir0WRCBgBIDU1Fbdv385x/cOHDwEA7dq1AwDY29vj8ePHePToEb799tts2/v6+gJArtpDEhERERVlRSJgvHv3bo7r2rZti5CQkGxzSbdt2xZHjx7FuXPnMG7cuCxtGSMiInDv3j0YGBigdevW+Vp2IiIiosKuSHR60Ubbtm1RuXJlBAQEYMuWLcrlaWlpmD9/PjIyMtC7d29YWloWYCmJiIiICl6ReMKoDX19faxatQqurq5YvXo1zp49Czs7Ozx48ABhYWGQSqWYPn16QReTiIiIqMB9tU8YgQ+zubi7u6Nbt26IjIyEh4cHDA0NMXLkSPz1118wMzMr6CISERERFbgi/4RRMWd0Tuzs7PDbb7+JVBoiIiKiL89X/YSRiIiIiD6PASMRERERqcWAkYiIiIjUYsBIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESkFgNGIiIiIlKLASMRERERqcWAkYiIiIjUYsBIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESklkFBF4CKpoTkdLwIidMoTRVbc5iWMMynEhEREZG2GDBSvngREofZm65rlGbJmOaoVd0qn0pERERE2uIraSIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBIRERGRWgwYiYiIiEgtBoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqGRR0AXQhPT0dt2/fxuXLl+Ht7Y2QkBAkJCSgTJkyaNq0KUaMGIFq1aqpTBscHIy1a9fCy8sLcXFxKFeuHJydnTFmzBgYGxuLXBMiIiKiwqdIPGG8c+cOhg8fjr179yI8PBx16tRBy5YtkZmZiSNHjqBHjx74999/s6Xz8/ODi4sLTpw4gVKlSuHbb79FWloatm7diu+++w4JCQkFUBsiIiKiwqVIPGGUSCTo0KEDhg0bhgYNGiiXZ2RkYNWqVdi5cydmzZqFCxcuwNLSEgDw/v17TJ06FYmJiZg4cSLGjh0LAEhLS8P48ePh6emJlStXYuHChQVSJyIiIqLCokg8YWzatCnWr1+fJVgEAAMDA0yfPh2VK1dGfHw8PD09les8PDwQGBiI6tWrY8yYMcrlRkZG+PXXX2FgYIAjR44gJiZGtHoQERERFUZFImCUSCQ5rtPT04NcLgcAhIeHK5d7eHgAADp16pQtfdmyZVG/fn2kp6dnCTKJiIiIvkZF4pX05wQFBQEArKyslMv8/f0BAA4ODirTODo64vbt23jy5En+F5DyLCE5HS9C4jRKU8XWHKYlDPOpREREREVHkQ8Yvby84Ofnh2LFiqFVq1bK5aGhoQAAGxsblenKli0LAAgJCcn/QlKevQiJw+xN1zVKs2RMc9SqbvX5DYmIiL5yReKVdE6io6MxZ84cAMCoUaNgbW2tXJeYmAgAKFGihMq0JiYmWbYjIiIi+loV2SeMqampmDBhAkJCQtC0aVNlL+hP5dT+URCE/CwefeH4CpyIiL4mRTJgzMjIwMSJE3Hnzh04ODhgw4YN0NfXz7KNiYkJYmNjkZSUpHIfiuWKJ41EH+MrcCIi+poUuYDx/fv3mDZtGi5dugSpVIodO3bA1NQ023Y2NjaIjY1FWFiYshf1xxQ9qsuXL5/vZSb6HLGfaBb1/IiISDNFKmAUBAGzZ8/G2bNnUblyZezatUs5UPen7O3t8fjxYzx69AjffvtttvW+vr4AoDKYJBKb2E80i3p+RESkmSIVMP78889wd3dHhQoVsGfPnizD6Hyqbdu2OHr0KM6dO4dx48ZlacsYERGBe/fuwcDAAK1btxaj6EQkoqL+BJVPbIlI14pMwLh06VL8/fffsLGxwZ49e1CuXDm127dt2xaVK1dGQEAAtmzZgtGjRwP4MDXg/PnzkZGRgf79++f4hJKIvlxF/Qkqn9gSka4ViYDx33//xe7duwEA5cqVw5o1a1Ru17FjR3Ts2BEAoK+vj1WrVsHV1RWrV6/G2bNnYWdnhwcPHiAsLAxSqRTTp08XqQZEREREhVeRCBjfvXun/Pf9+/dx//59ldvZ2dkpA0bgw2wu7u7uWLduHby8vPD8+XOUK1cOI0eOxNixY9lDmojoM4r663bmx/yY3wdFImDs1asXevXqpVVaOzs7/PbbbzouERHR16Gov25nfsyP+X1QpGd6ISIiIqK8Y8BIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESkFgNGIiIiIlKLASMRERERqcWAkYiIiIjUYsBIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESkFgNGIiIiIlKLASMRERERqcWAkYiIiIjUYsBIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESkFgNGIiIiIlKLASMRERERqcWAkYiIiIjUYsBIRERERGoxYCQiIiIitRgwEhEREZFaDBiJiIiISC0GjERERESklkFBF6AwSE9Px65du3D8+HEEBwfD2NgYDRs2xNixY1GzZs2CLh4RERFRgfrqnzCmp6dj5MiRWLVqFSIjI9GqVSvY2dnhwoUL6NevH65fv17QRSQiIiIqUF/9E8bt27fj5s2bcHBwwO7du1GyZEkAgLu7O2bOnIlp06bh33//hYmJSQGXlIiIiKhgfNVPGDMyMrB7924AwIIFC5TBIgD06NEDLVu2RHR0NI4cOVJAJSQiIiIqeF91wOjt7Y3Y2FjY2tqidu3a2dZ36dIFAHDx4kWxi0ZERERUaHzVAaO/vz8AwNHRUeV6BweHLNsRERERfY2+6oAxNDQUAFCuXDmV6xXLY2NjkZiYKFq5iIiIiAoTiSAIQkEXoqDMmzcPBw8exOjRozF58uRs6zMyMpRPGa9evYoyZcpotH+5XA5BEGBmZqZyvSAAmRoefj2JBBKJRklEz4v5MT/m9/XkV5TrxvyY39eWX3x8PCQSico3q199L2kAkGj7SX2Gnp4eMjMz1eQL6OdT3gWZF/Njfszv68mvKNeN+TG/ry0/iUQCPT3VL5+/6oBRMVROUlKSyvUfL9dmWB0/Pz/tCkZERERUiHzVbRhtbGwAAGFhYSrXK5ZbWFhwHEYiIiL6an3VAaO9vT0AwNfXV+V6xRNCuVwuWpmIiIiICpuvOmCsX78+LCws8Pr1azx8+DDb+jNnzgAA2rVrJ3bRiIiIiAqNrzpgNDAwwODBgwF8mOklPj5eue7kyZPw9PSEpaUlevXqVVBFJCIiIipwX/WwOgCQlpaGESNG4Pbt27C0tETDhg0RGRmJ+/fvw9DQEBs3bkSrVq0KuphEREREBearDxiBD0Hjzp07ceLECQQHB8PY2BgNGjTAuHHjlOMwEhEREX2tGDASERERkVpfdRtGIiIiIvo8BoxEREREpBYDRiIiIiJSiwEjEREREanFgJGIiIiI1GLASERERERqMWAkIiIiIrUYMBZSwcHB8Pf3L+hikBb42emW2MezqH9+Rb1+RJQ/OHB3IeXq6op79+7Bz89PJ/tLS0uDn58fjIyMUKNGDRgaGirXeXt7w8PDA9HR0bCzs0PXrl1ha2ub5zwDAgJw8eJF+Pv7IyQkBImJiQAAExMTlC9fHvb29mjXrh2kUmme8/rU8+fPsW3bNty6dQuRkZEwNjaGo6MjBg0ahDZt2ug8v48Vhc8uLCwM4eHhsLKy+uz+AgMDERUVhUaNGuU5X1V0fTwLIr+CvBY+pev6Xbt2DT4+PrC0tISTkxNKlSoFAAgPD8eaNWtw48YNxMXFwdbWFp07d8bIkSNRrFixLyY/VXx8fHD16lVERETA2NgYtWrVQvv27WFkZKTTfICCvZd9ytPTEzExMejRo4fW+xD7WhDzXlbUv2cZMBZSrq6uuHv3Lh4/fpznff3777+YM2cO3r17BwAoX7481qxZg1q1amH9+vXYsGEDAEAQBEgkEhQrVgzLli2Ds7OzVvlFRERg7ty5uHr1qnK/qkgkEgBAixYtsGjRIpQtW1ajfFauXIk9e/bgjz/+QL169ZTLPTw8MHnyZKSlpWXLWyKRYOjQoZg5c6ZGeWniS/7sgoODMWvWLHh7eyuX2dvbY9asWWjcuLHKNNOnT8epU6d0Ul9VdHk8xc5PrGtBE7qqnyAImDx5Ms6dO6dcZmZmht27d8Pa2hr9+/dHaGholjQSiQT169fH3r17oa+vX2jz27ZtG6RSKVq3bp1leUpKCn766Sf8888/yjIpPjsbGxts3LgRcrlco3oBhfde9qn+/fvDx8dHq3NH7GtB7HtZUf2ezbIvBoziatGiRa62i4uLQ0ZGBkqXLq1cJpFIlCdHbgUEBKBXr17IyMhApUqVoKenh5cvX6JMmTL4/fffMWjQINSoUQPOzs4oVaoUvLy8cOHCBRQrVgwnT55ExYoVNcovOjoavXv3xps3b2Bubo62bdvC0dERZcuWhYmJCQRBQFJSEsLDw+Hr6wsPDw/ExsaiXLlyOHr0qPJpQW707t0bkZGR8PT0VC6LiYlB+/btkZiYiLZt26J///6oUKECYmJi4OXlhV27diE5ORnr1q1D+/btNapbUf/s4uLi4OLigrCwMABAqVKlEBcXh/fv30NPTw9jx47F+PHjs6XT9iYr9vEUOz8xr4WCqN/hw4cxd+5cmJqaonPnzgCA06dPw87ODvXr18eBAwfw/fffo1OnTjAzM8P9+/exdu1axMTEYPbs2XB1dS20+cnlcnTv3h0rVqzIsnz69Ok4efIkDAwM0LZtW1SpUgWxsbG4dOkSIiIiYG1tjZMnT8LCwkKjuol9L9OWtgGj2NeC2Peyovw9+zEDrVKR1iIjIyGRSHL8NaBqewXFLwVN7Nq1CxkZGZg1axaGDh0KADh06BDmzZuHWbNmoWHDhti5c6fyVcr333+PLVu2YPXq1Thw4IDGv17Xr1+PN2/eoF+/fpgzZ85nXwWlpaVh0aJFOHjwINavX4/58+fnOq/Xr1/DwcEhy7IzZ84gMTERAwcOxLx587Ksa9iwIZo1a4YhQ4Zg3759Gt9ki/pnt3PnToSFheGbb77B0qVLUbZsWbx79w67d+/Gtm3bsGHDBkRGRmLBggUa10UVsY+n2PmJeS0oyitm/dzd3aGvr499+/Ypn6p999136NOnD54/f44RI0Zg6tSpyu1lMhnq1KmDPn364PTp0xoHjGLn96knT57g5MmTMDMzwx9//JHlSeLs2bMxduxY3LhxA/v378e4ceM02rfY9zKxiX0tiH0vK8rfsx9jwCgyCwsLvHv3Dt999x2GDx8OPT3V/Y6mTJkCHx8f/Pvvv3nK7/bt26hQoYLyJAaAvn37Yvv27QgKCsLixYuztbsZOnQodu3aBS8vL43zu3TpEipXroxffvklV9sbGRnhl19+we3bt3Hp0iWNTuTk5GSULFkyy7KnT58qX9Wo0rBhQ9SqVQu+vr65zkfha/jsSpYsidWrV8Pc3BwAULJkSUyYMAEtWrTAjz/+iL///hvJyclYtmyZVkHGx8Q+nmLnJ+a1AIhfv4CAANStWzdL4FSzZk3Uq1cP3t7e6NevX7Y09vb2qFWrFp4/f17o8/uU4unfuHHjsr12LlasGBYtWgRnZ2dcvnxZ44BR7HtZbp9Gfyo2NlardGJfC2Lfy4ry9+zH2EtaZGfPnoWzszP+/PNPTJw4Udk4+9M/xcn16XJNRUREoHr16tmWK5apam9TrFgx1KhRA69fv9Y4v8jISK0a10qlUkRFRWmUxtraGi9fvlS5ztLSMsd0FhYWSEtL0ygvoOh/dsHBwahVq5byBvux+vXr46+//oKtrS1OnDiBSZMm4f379xrn8TGxj6fY+Yl5LQDi1y8pKQlWVlbZliuWlSlTRmU6a2trJCcnF/r8PvX69WtIJBK0bdtW5XobGxvI5fIc70nqiH0vi4yMRFRUFCIjIzX6y8jI0DgvRX5iXgti38uK8vfsxxgwiszS0hK///47NmzYgLdv36Jfv35YsWIFUlJS8iU/Q0PDLD21FIyNjQFA5QUFfLgJa1MmKysrBAQEaJwuICAgS5uq3GjatCmePHmCe/fuKZcpXuvcunVLZZrk5GQ8fPhQqy/Iov7Z6enpwcTEJMf1FStWxP79+1GlShWcP38e48eP1+rLSkHs4yl2fmJeC4D49TM3N0dISEi25YovwBcvXqhM9/Lly2xP0wpjfjkpV65cjutsbGy0Ck7Fvpcp2li6u7vj4sWLuf6zt7fXOC9A/GtB7HtZUf6e/RgDxgLSrl07nDlzBt27d8fOnTvRrVs3XL9+Xef5lCpVChEREdmWW1lZqW1om5CQoHHDbQBo3bo1Xr16hZ9//jlXF2BaWhoWLFiAV69e4dtvv9Uor+HDh0NfXx8TJkzAzZs3AUA5VMGCBQuy9I4DPjQUnjZtGqKjo5WN5rVRVD+78uXLIzAwUO02ZcuWxb59+yCVSnH58mWMGTMmzwGJWMdT7PzEvBY+Jlb9FK9DP+6ocfnyZfj6+qJ06dJYv359tvaUZ86cwdOnT7UKPMTOLzk5GaGhoco/RUeBt2/f5pgmMTFRq+BU7HuZo6MjgA+vmFU9hc7pT9thg8S+FsS+lxXl79mPsZd0IXDjxg3MnTsXb968Qffu3TFr1ixMmDBBJ0NfuLm54fbt27hz5w4MDHLfZLVly5YoV64cDh06pFF+0dHRcHFxQWRkpLL3loODA2xsbFCiRAlIJBIkJSUhLCwsS+8ta2truLu7a9x76+jRo5g7dy4EQYCjoyO++eYbSCQSbN++He/fv0flypVha2uLuLg4PHv2DMnJyZDJZDh48KBOxmYrSp+doofghQsXUKFCBbXbvnv3DsOGDYOvr6+y/Y8uhqHJz+Mpdn5iXwti1+/atWsYOXIkDAwMUL9+fQAfxprT19fHjh07MHjwYNjb26Njx44wMTGBj48PTp06BUEQsGrVKo0DHTHzk8vlObZrW7NmDZycnFSua9myJUqXLg13d3eN6gaIey/73//+h82bN2PatGkYMWJErtPlpZe0mNeC2Peyov49q8CAsZBISkrCqlWrcODAAVhYWMDAwACRkZF5vqmvX78e69evx/79+9GgQYNcpbl69SpGjRqFYcOGaTXG15s3bzB79mxlY96cbryKU69p06ZYsmQJypcvr3FeAODl5YWff/4ZQUFByrxUndZ6enpwcXHB7NmzYWZmplVeqhSVz+706dOYOnVqrtMmJCRg1KhRuH//PiQSic4Cuvw6ngWRn9jXgir5Wb8dO3Zg9erVyrZtxsbGWLJkCZydnbFr1y4sX7482zXZs2dPLF26tFDnl1M7RcW6uXPnZlt+48YNDB8+HH379sWvv/6qUX4KYt3LPD09MW3aNDg5OWHRokW5Trdx40YEBwdr9fmJeS2IfS/7Gr5nAQaMhc7du3cxZ84cvHr1SidfwqmpqYiPj4eZmVmuf4WeOXMG//33H3r27Kl1mxXgw6+0ixcv4smTJypHoJfL5WjXrl2e8lAQBAHXr1+Hl5cXnj59iri4OGRmZsLU1BS2traoWbMm2rVrl68DIn/pn11CQgK2b9+O4sWLY/To0blKk5KSglWrViEhIUHrICAnuj6eBZmfmNdCTvKrfhEREfjvv/9gaGiIunXrZumkcefOHRw/fhxv3rxBmTJl0KFDB7XBWGHML7fOnTuHe/fuoWPHjmjYsKHW+ykM97L8JMa1IPa97Gv5nmXAWAilpaUp219oM2sAFRx+drol9vEs6p9fUa8fEeUfBoxEREREpBYH7iYAwP79+xEUFISffvqpoIuiM8nJyXjw4AGioqJgbGwMBweHHMdq0yWxjmVB1S8/JSUlITQ0NMtrFRsbG7VDZHxJ+RWkgjhfEhIS8PDhQ2Vv3Fq1auV5kOTPEeP6E+NYfk3npkJ+fnYFfTwL4lrQNT5hFFmHDh3QuHFj9OjRA40aNSro4ii5urrmW0/U3Fi4cCECAwOxZ8+eXKd5/vw5/Pz80LBhQ9jY2CiXZ2ZmYt26ddi9e3e2YRI6dOiABQsW6KQHak50dSwLU/3Cw8OxefNmXL16FRERETA2NkatWrUwePBgtGzZMk/7DgsLw549e3Dx4kUEBwer3KZixYpo164dBg8enOVYfAn5qZIfx7Mgzpfg4GCsWbMGPj4+sLS0RJ8+fZQzrhw7dgxLly5FfHy8cvtKlSph2bJlqFevnlb55YYurr+CuvbEPjdfvHiBs2fPIjw8HNbW1mjfvr3apgoHDx7E/fv3dd5eWUHX30NiHk8xr4WCiiMYMIrs4+Eaypcvj549e8LFxUXjycdz69q1a7nabsWKFXj69Cm2b9+epVeetlNIaUqb4RqmT5+O06dP49KlS1kagM+ZMwdHjx6FIAgoVaoUbG1tERsbi9DQULx//x7VqlXDwYMHNf5lKfaxFLt+nTt3Rrt27bLMxwsAfn5+GDFiBGJjY7P12JRIJJg0aRLc3Nw0ykvh2LFjWLBgAdLS0pT7NjMzg7GxMQRBQHJycpabrJGREX7++Wf07t270Ocn9vEU+3yJioqCi4sLoqKilPWQSCSYMWMG5HI5RowYAUEQUKFCBZiZmSEwMBApKSkwMTHB8ePHPzvcyafEvP7EPpaA+NfCn3/+icWLF+P9+/cQBEH5vdSzZ0/MmzcPJUqUyJZGMVyNpgFdQXwPiXk8xb4WxI4jFBgwikwul8Pa2hopKSmIj49XfugNGzZEjx494OzsrNNH5OrGE8sNsZ44ahMwdujQASYmJlnGPHv06BH69OkDc3NzLF68GO3bt1euCwoKwpw5c3D37l2MGTMGEyZM0KiMYh/Lgqhf9+7dsWLFCuWy9+/fo1OnTggKCkK9evXwww8/oEqVKoiLi8OZM2fwxx9/AAD++usv1K5dW6P8bt26haFDh0JfXx99+vRBly5d4ODgoJwdQSEpKQl+fn44ffo0Dh06hMzMTOzatQtNmjQp1PmJfTzFPl+WLl2KPXv2oGXLlhg3bhwEQcD69evh6+sLqVSKoKAgrFu3DrVq1QIAxMfHY/bs2bhw4QIGDBig8Xy2Yl5/Yh9Lsc9NHx8ffPfdd8jMzMQ333wDe3t7BAUF4fLly8jIyECNGjWwY8cOWFtbZ0mnbcAo9r1T7ONZENeCmHGEAtswFoBmzZph0aJFuHDhAo4dOwYvLy/cuXMHd+/exaJFi9C+fXv06NEDzZs310l+EokEderUUTugqL+/PxISEvI0HASQ+1+Sn/r4l15uvX37FjKZLMuyS5cuQSKRYP78+Vlu6MCHVwAbNmxAx44dce7cOY1v6oC4x7Ig6vepS5cuISgoCA4ODti3bx/09fWV6+rUqYOyZctixYoV+PPPPzUOcLZu3Qo9PT3s3LkTjRs3znE7Y2NjNGzYEA0bNoSzszOGDRuGrVu3anxTFzs/VfLzeIp9vly5cgWWlpZYu3at8mnU+vXr0aZNG9y5cwf/+9//lF+QwIenO8uWLcPdu3e1nnlGrOtP7GMp9rm5e/duCIKAOXPmwNXVVbk8MDAQ06dPh6+vLwYOHIhdu3ZpNfWgKmLeO8U+ngVxLYgdRwAMGAuMkZERunTpgi5duuDt27c4ceIE3N3d8fTpU5w8eRKnTp1CmTJl0KNHD7i4uKBq1apa5dOjRw+4u7sjKSkJv/76a45fQoq2I4onHNoaOXKkVr8kP34lklsGBgbZJo0PCwsDgBzbgZUsWRJ16tRRTr+lCbGPpdj1U0UxkO3kyZOzBDcKQ4YMwa5du7LMgZtbjx49QqNGjdTe0D/VpEkTNG7cGA8fPiz0+amSn8dT7PMlNDQUTZs2zfLqskSJEqhTpw6uXLmi8jibmJjA0dERt2/f1jg/Ma8/sY+l2OfmvXv3YGdnlyVYBICqVavizz//xMyZM3H27FkMGjQIu3fvhp2dncZ5fEzse6fYx1Psa0FBrDhCgXNJFwLW1tYYMWIETp48iaNHj2LQoEGwtLREeHg4tm7dii5duqBfv374888/Nd73smXLsG3bNsTHx+P777/HsmXLkJycnA+1+ODjR+Oa/Gnz+NzOzg4+Pj5ZbuyKSd7VPbGMj4/XalpAsY+l2PVTJSYmBgBQs2ZNlev19fUhlUoRHh6u8b7T0tK0+tyNjY2Rnp5e6PNTJT+Pp9jniyAIKoNePb0PXyu67gEq5vUn9rEU+9yMioqCVCpVuc7IyAi///47+vfvjzdv3mDgwIF49uyZxnl8TOx7p9jHU+xrQZX8jCMUGDAWMjVr1sTcuXNx9epVbNy4Ee3bt4eBgQF8fHzwyy+/aLXPli1b4tSpU+jbty/27NmDrl274sqVKzou+QeVK1cGACxZsgR//PFHrv+qV6+ucV5OTk6IiorChg0blMtat24NQRBw8OBBlWn8/Pzw8OHDHL+wP0fMY1kQ9fuU4kuyePHiOW5jZmam1Q2xSpUq8PLyQkRERK7ThIeH4+bNm6hSpUqhz0+V/DyeYp8vNjY28PX1VU7TBwDp6enw9fUFAJVPTpKSkvDo0SOtZykR6/oT+1iKfW5aWFioDdgkEgkWLlyIYcOGITIyEq6urvDz89M4n4+Jee8U+3gWxLWgTn7EEQADxkJLX18fbdu2xbp163Dt2jXMmzcvSxsITRkbG2PBggXYu3cvDAwM4ObmhmnTpiE6OlqHpQYcHR0BIM83l9wYMmQI7OzssGnTJsycORPPnz9HkyZN4OLigm3btmHevHl48OABIiMj8fz5c+zbtw+jRo1CRkYGBg8erHW+Yh3LgqjfkydPlPOirl+/Hi9evAAAhISE5JgmPDwcFhYWGufVr18/JCUlYcCAATh37pzaX/YZGRk4f/48Bg4ciOTkZOVwFYU5P0Dc4yn2+dKiRQtERERg3rx5CA0NRWhoKObNm4e3b9+idevWWLp0aZbOCgkJCZg9ezZiY2Pz1B5UjOtP7GMp9rlZoUIFPHr06LPbzZw5E6NHj0ZMTAyGDh2a5yeNYt07xT6eBXUtfI6u4wj2khaZqp6TYktNTcWaNWuwd+9emJmZYebMmTh69KhOxr/au3cvlixZgpEjR2LatGm5TtevXz88fPhQ4/xDQkIwYsQIvHz5EhKJBNbW1rCzs8P9+/eztUECPrw6GDduHH788UeN8slJfh5LQNz6fTr+mkQiUbYtnTp1KkaOHJktTUJCAlq0aIE6depoNIamwowZM3DixAlIJBIYGRmhRo0asLGxQYkSJSCRSJCUlISwsDA8ffoUqampEAQB3bp1w8qVKzXOS+z8CuJ4inm+REREoHv37oiLi8uyP6lUin379qF79+54+/YtKlSoABMTE7x48QIpKSkwMjLCsWPH8tyeCsjf60/se4uY5+aqVauwfft27NixA998881nt9+yZQtWr16tfPKti3tbft87xTyeYl8LBRVHMGAUWWEIGBV8fHwwe/ZsPH/+XLksrxfqixcv8Oeff0IqlaJPnz65Tufn54eEhASNGikrpKWlYdu2bfj7779zfAVhYGCAJk2awM3NTas8Pic/jqWCWPVbv359jusqVqwIFxeXbMv379+PX3/9FW5ubpg8ebJW+Z44cQLbt29HQECA2u1kMhmGDx+ushyFMb+COp5iXg+BgYFYvnw57ty5AyMjIzRv3hwzZsxA2bJl4e/vj4kTJ+LVq1fK7a2srLBkyRK0atVK6zxVya/rT+x7i1jnpre3NwYMGICmTZti9+7duUqzZ88eLFu2DIBuh1vLz3unmPcWMa8FBoxfiZCQEBgbG8PS0rKgiwLgQzuLTZs2wcPDAwCyjDv2JXr+/DkCAgIQFxeHzMxMmJqawtbWFjKZDKampvmatxjHsiDrp0poaCjevXuHcuXKafUa9WNv3rzB48ePVU7fZW9vj/Lly+ugxAWXX27o8ngCBX++ZGRk4MGDBwgLC4O1tTXq1q0LIyOjfMkrv68/MY9lfp+bgiAoZz6pVKlSrtM9fvwY8fHxOv/Rnd+fXWG41nV5LRRUHMGAkYiIiIjUYqcXIiIiIlKLA3cXkISEBPj7+8PS0hLVqlVTLhcEAe7u7rhx4wbi4uJga2uLzp07izrBeF7cvXsXjo6OaocNEdvo0aNx//593Lp1K0/7KYx1S0hIgLe3N6Kjo2FnZ4e6detqPeZXYatfWloafHx8EBERAWNjYzg4OGSbqkzXdHk8P5aeng4vLy88fvwYr1+/RmJiIiQSCSwsLFCjRg00bdpUOSRVfhH7eBbE5wfo7nrXRmRkJNLS0nT6ijMtLQ2nTp3CrVu3EBkZCRMTEzg4OKBXr175ejzz61ooDPl9jeemLjBgLAD79+/HihUrkJaWBgCoW7cuNm7cCDMzM7i5ueHGjRtZJl7/66+/MHToUMycOVOr/NLS0rBp0yacOnUK4eHhsLKyQseOHfHDDz+gVKlSKtNMnz4dp0+f1nh4nEGDBsHY2BjOzs7o0aNHvnQw0VRiYiLevXuX5/0URN2CgoKwePFieHl5wcjICK1bt8acOXNQqlQpeHp6YtasWYiNjVVuX716dfzvf//Tqgeq2PXz8fGBpaUlKlasmG3dn3/+iTVr1mT73Dp16oSFCxfCzMxMqzzFPJ7Ah7mjt27dip07dyIhISHb+o9nOPrmm28wd+5crcd8FPt4FsTnlxu6ut61MW7cODx8+FDj++axY8dw8uRJTJ06FQ4ODsrlL1++hJubG4KCgrJ8J1y4cAGbN2/GihUr0KFDB63KKva1IGZ+X8O5Keb3ugLbMIrs1q1bGDJkCIAPg1wLgoBXr16hQ4cOqF27NlatWgV7e3t06NABZmZmuH//Ps6ePQsA2LhxI9q0aaNRfpmZmRg2bBhu376d5YYjkUhQunRprFy5Es2aNcuWLi+TzH+cR/ny5dGzZ0+4uLiovHjzYurUqbnazsvLCzExMejcuXOW5atWrdIoPzHrBnyYFaRbt26IjIzMsrxu3br4/fff0aVLFwBA7dq1UapUKXh7eyM8PBw2NjY4deqUxjMdiF0/uVwOFxcXLF++PMvyHTt24LfffoMgCChVqhQqVaqEuLg4vHjxQjkf7f79+1XOrKCO2Mfz/fv3cHNzw/Xr1yEIAszNzWFhYYGwsDCkpqbCwMAAffr0QUxMDG7fvo2YmBgUL14cGzduzNVQJ58S+3iKnZ/Y17s2+vfvDx8fH43vm0OHDoWvry+uX7+u7AiRnp6Orl274tWrV7C2tka3bt1QoUIFxMbGKucNNjQ0xMGDB2Fvb69RfmJfCwVxLyvK56bY3+tKAolq7NixglwuFw4fPqxcdvDgQcHe3l5o2bKlMHr0aCEjIyNLmuPHjwsymUxwc3PTOL8///xTkMlkQqtWrYQTJ04Iz549Ey5evCgMGjRIkMlkgoODg3Dy5Mls6aZNmybI5XKN85PJZMKAAQOEefPmCY0aNRJkMpkgl8sFuVwuDBw4UDh06JAQHx+v8X5zyksulwsymeyzf59uV9jrJgiCsHLlSkEmkwmTJ08WwsLChIiICOGnn34S5HK5MHToUKFNmzbCy5cvldunpaUJEydOFORyubB9+/ZCXz+ZTCZMnz49y7KIiAihVq1aQs2aNYUDBw4ImZmZynWPHz8WOnToIMjlcuHQoUMa5yf28dy1a5cgk8mEvn37Co8fP1Yuz8zMFI4dOyY0aNBA6N27t5CRkSGkpaUJ+/fvF2rXri3Ur19fCAsL0zg/sY9nQeQn5vWujX79+mmVV/PmzYUBAwZkWXb69GlBJpMJgwYNEpKTk7Ol2b17t/J81pTY10JB3MuK8rkp9ve6Al9Ji+zBgweoXr06evfurVzWt29f7N27F8+ePcPEiROz/brp3r07tm3bptWk6CdOnICBgQF27typbCtZrVo1tG3bFjt27MDvv/+OGTNmIDk5GX379s1b5f6Pra0tfvnlF8ydOxf//vsvjh07hhs3buDu3bu4d+8eFi1ahPbt26Nnz5745ptv8tRORSKRoF+/fqhbt26O22zduhUvX77E0qVLtc5HQcy6eXp6wtzcHEuWLFG2K1ywYAE8PT1x8+ZNrFy5EnZ2dsrtDQ0NMWfOHFy6dAkeHh4YMWJEoa6fKhcuXEBaWhq+//57fP/991nWyeVyLF++HAMGDMCZM2c0GucTEP94Hj9+HObm5ti8eXOWV0QSiQQ9evRAZmYm5syZg3PnzqFz584YMGAALC0tMXnyZGzfvh1z5szRKD9V8vN4FkR+Yl3vuX1i9KmPx9nTRFxcXLY2dP/99x8kEgl++uknle2KhwwZgoMHD6qcdu5zxL4WCuJe9qmicm4CBfO9DrANo+hiY2NRv379bMurVKmCZ8+e5dh+qXLlysppxTTx9OlT1KlTJ0vHGoURI0agSpUqmDx5MubPn4/k5OQ8TZn3KSMjI3Tu3BmdO3dGZGQkTpw4gePHj+PJkyc4deoUTp8+jTJlysDFxQUuLi4qy6jO7t27MW/ePBw+fBiGhoaYMmUKjI2Ns2139OhRvHz5Ej179tRV1fK9bgDw+vVr1K9fP8uXhZGREWrWrIlr166p7AhlbW0NuVyeZRDcwlo/VZ49ewaJRILvvvtO5fp69eqhRo0a8Pf313jfYh/Ply9fomHDhjm2J2rTpg0EQcC9e/eUr6g6deqENWvW4MqVKzoJGPPzeIqdn5jX++nTp5Uz82hKmx9R5ubm2V7XJiUlAYDaNq2VK1fWaj5msa+FgryXKRSVcxMouO91DqsjsuLFi2eZPkhBsezt27cq0719+1ar3qupqalqe3+1bdsWW7duRfHixbF06VJs3bpV4zxyw8rKCsOHD8fx48dx7NgxDB48GKVKlUJ4eDi2bduGrl27ajyHZ9OmTXHy5EkMGjQIBw4cQNeuXXH58uV8Kb86+VE34EMbOFUDAisGdM5p0vpy5cqp7GChrfyqnyqpqakAkOVpw6fs7Oy0ajgu9vHU09NDSkpKjusVnd4+nedWJpMhLCxM4/xUyc/jKXZ+Yl7vinvtwoULsXTp0lz/advWt1atWspBnRUUA2qrm7/52bNnKF26tMb5iX0tFIZ7WVE5N4GC+15nwCiy6tWr4/79+wgJCVEuCw4Ohre3NwwMDHDo0KFsaZ48eYKHDx9q1VvMxsbms18+TZo0wa5du2BiYoLVq1dj7dq1GuejCXt7e8yePRtXrlzBpk2b0KFDBxgYGGj1yr148eL46aefcODAARQvXhxjxozBlClTEBUVlQ8l/zxd1s3S0lJlPQRBUPvkIz09XeNG4rmly/qpUqFCBQBQ+yWRmZmp8tf754h9PGvUqIGHDx8iMDBQ5fpjx45BIpFkG04nNTVVZ7Oh5OfxLIj8xLreFZ1IatasiZ49e+b6L6enyZ/z/fffIz09HZMmTUJMTAwAwMXFBcWLF8fixYuVs5N8bPPmzQgKCkLLli01zk/sa6Ew3MuKyrkJFNz3Ol9Ji6xHjx7477//8P333yufyhw8eBAZGRmYO3cuFi9ejJSUFDg7O8PExAQ+Pj5Ys2YNMjMz0bFjR43zq1KlCq5fv474+Hi1wwXUrVsXe/bswfDhw7Fp0yadfYGoo6+vjzZt2qBNmzZ49+4dTp8+rfW+6tatC3d3d2zYsAE7duzA9evXMWPGjCxtRcWki7pVrFhRZTOE+fPnY/r06Tmme/HiBcqUKaNxfprQ1Wf377//ol27dsr/JycnA/gwDZuVlZXKNC9evNDqi1ns49m7d2/MmzcPI0eOxKRJk9CkSRNYWlri9evXOHLkCPbs2QMDAwM4OztnSffy5UvY2NhonB8g7vEsiPwU8vt6r1WrFv777z/4+vqiVq1aOtmnOq1atUK/fv1w8OBBODs7o0+fPmjevDkmT56MFStWoF27dnBycoKtrS3i4uJw8+ZN+Pn5wdTUFGPHjtU4P7GvhYK4lxXVcxMouO91PmEUWb9+/fDtt98iIiICGzZswPr16xEREYGhQ4di4MCBcHJywp49ezBgwAC4uLhg3rx5iIyMhEwm06odQsuWLZGenp6ruTkdHBywd+9elCpVSuUv2vxUsmTJbA2RNWVkZITJkyfj0KFDKF++PObOnQtXV9cCe9qooG3dHB0dERUVhadPn2bbX06vcJ48eYIXL16gdu3aWpVVG3n57JKSkhASEqL8i46OhiAIyqGkPvXs2TMEBgZqPIwIIP7x7Nu3Lzp27IjQ0FDMnDkT3377LerUqYMuXbpg586dyMjIwMyZM7MM8vzs2TO8evVK64H6xTyeBZHfx/Lzeq9VqxYEQcCjR480SmdlZaV1sP/LL79g7NixSExMxM6dOzFixAgsXboU79+/R1xcHA4ePIjVq1dj586d8PX1RaVKlbB3716t8hP7WiiIe1lRPTeBgvte5xNGkenp6WHz5s04d+6cchyt5s2bo0WLFgCA5cuXo1q1ajhx4gTevHkDa2trdOzYEePGjdPqNVWbNm1w6tQpeHt7w9XV9bPby2Qy7Nu3D5MmTUJ8fLzG+TVq1EhnHSC0ZW9vj8OHD2Pbtm3YuHEj0tLSdNKbV+y6DRgwALVr19ZoINlr165BLpejbdu2Gucndv0uXryY47qczvUbN25ALpejdevWGucn9vEEgP/973/Yu3cv9u/fj6CgIAAfns7WqVMHbm5u2epha2uLixcvwtzcXOO8xD6eYueXk/y43tu3bw93d3eNX4du2LAhT/lOmDABPXv2xMGDB3Hz5k08ffpU2Q5WX18ftra2qFmzJtq3bw9nZ2cYGGj3FS72tSB2fkX53ATE/15X4MDdVKQFBgbi6tWrAKAcMJ2oICQkJCApKQkWFhY6a6NIWRXF6z0tLQ2ZmZmFZspO0k5RODcZMBIRERGRWmzDSPSFS0tLU7bPEYO3t7faVz5EaWlp2YYLIvoaiX0t5Gd+fMIosjlz5qBx48bo2LEjSpQoUeTy+5yzZ8/i6tWriIiIgLGxMRwdHdGnT5889Ux7//497t+/j4iICFhZWaFevXowNDTMcft79+4hODgYPXr00DrPS5cu4eLFi/D390dISIiyMbGJiQnKly8Pe3t7tGvXDt9++22e2qxkZGTgyJEj8PHxgaWlJXr06IHq1asDAPz9/bF48WJ4e3srh4NwcnLCtGnT8tzTTx1XV1fcvXtX+/lI/09BnZtinS9FvX4KmZmZ+Oeff3DhwgX4+fkhJCQE79+/B/BhqBGpVIpmzZqhb9++sLW11SqPvDh8+DDCwsIwfvx4ne0zP+5j2tBF3cS6l6lTVI6n2NeC2PkxYBSZXC6HRCJBiRIl4OTkhB49eqBJkyZFJr8FCxagTp062Uayj4mJwZgxY/DgwYNsk6WbmppizZo1aN68ucb5+fj4YMqUKVnGtbSyssLUqVNz/ALMywTsAQEBmDp1Kp49e/bZJ3oSiQTVq1fHb7/9BplMpnFeaWlpGDp0KO7fv6/My8DAAFu3bkX58uXRv3//bIPASyQSVKlSBYcPH9Z4CAXFQNKfM3z4cNy7dw8+Pj5ZjoGm7fLEPjcBcc+Xol4/4ENP1smTJ+PFixefvR4MDQ3h5uam08AtN/r37w8fHx+N6if2fUxb2tRNQcx72ddwPMW+Fgri2mPAKDK5XA59fX3lrwCJRAIbGxv07NkTLi4uytH9v+T8unfvjhUrVmRZPmLECFy/fh3m5ubo1asXqlSpgtjYWJw7dw6+vr4wNTXFqVOnUK5cuVznFR4ejq5duyI+Ph6mpqaoUqWKcvgExXy9ixYtyjY3t7ZfkK9fv0aPHj2QkJCA6tWrw9nZGY6OjihXrhyMjY0hCAKSk5MRFhYGX19f/PPPP3j69CnMzMxw9OhRjWeB2L17N5YtW4Zy5cphwIAByMzMxF9//YVixYqhfv36OHPmDCZOnIjOnTvD1NQU9+/fx+LFi/Hq1StMmDABY8aM0Si/vAwnIZFI4Ofnp1Easc9Nsc+Xol6/N2/eoHv37oiPj4e9vT2aNm0KCwsLhISE4Ny5c0hMTMS0adNQpUoVXL16FcePH0dCQgL69euHhQsX6rLqamkTBIh5H8sLbQMcse9lRf14in0tFNi1J5CoZDKZMH36dMHb21uYN2+e0KhRI0EmkwlyuVyQy+XCgAEDhIMHDwrx8fFfdH4fu3//viCTyYQWLVoIYWFh2dJMnz5dkMlkwooVKzTKa9GiRYJMJhOmTZsmpKSkCIIgCO/fvxcOHz4sNGrUSJDL5YKbm5uQmpqaJd20adMEuVyuYc0EYcaMGYJMJhM2bdqU6zSbNm0SZDKZMGPGDI3z69Onj1CrVi0hJCREuSw4OFhwdHQUatasKWzfvj1bmqCgIKF27dpCz549Nc5PcV7IZDKt/rTJT8xzU+zzpajXb86cOYJMJhPWr1+fbV1cXJzQt29foX79+kJ4eLggCIIQEREh9OzZU5DL5cKVK1c0zi81NVWrv759+2pcPzHvY2LXTRDEv5cV9eMp9rUgdn4KDBhF9umFk5qaKpw5c0b44YcfBAcHB+UXSp06dYSpU6cKV69eFTIzM7/Y/ARBENatWyfI5XLhyJEjKtPExsYK9evXF1xcXDTKy8nJSWjevLnyy/FjL1++FJydnQW5XC4MGzZMSE5OVq7T9guyefPmGpdREATBxcVFaN68ucbp6tevLwwZMiTb8iFDhghyuVx48+aNynSDBg0S6tevr3F+Tk5OglwuFxYsWKA2iBk0aJBWx+9TYp+bYp8vRb1+rVu3FpycnHJc/+DBA0Emk2X5YRMcHCw4ODgIY8aM0Tg/RaCt6Z/iOGtCzPuY2HUTBPHvZUX9eIp9LYidnwJ7SRcwIyMjdOrUCVu2bIGnpydmzpwJqVSKlJQUnDp1CqNGjULr1q2xatUqPH/+/IvLD4ByzsumTZuqXG9ubg65XK4c2Di33rx5A0dHRxQrVizbOjs7O/z5559wcHDAjRs3MGLECCQlJWle+I+8e/dOq9eIlSpV0mpC+9TUVJQsWTLbcsWynAZ3LlmyJFJTUzXO78SJExg5ciQOHTqErl27it4TOr/PTbHPl08VtfpFRUUpO2CpUqNGDQDIMiVchQoVULduXfj4+Gicn/B/8w5r+qcr+XUfA8Svm9j3MlWK0vEU+1oQOz8FBoyFSOnSpTFs2DAcP34c7u7uGDJkCEqXLo2IiAhs27YN3bp1+yLzU/TQzGn+TgAoU6aMxkMBFCtWLFt7rI9ZWFhgz549aNCgAe7du4ehQ4fm6WZXrlw5PHz4UKNypqenw8fHR6s2OKVKlVI5/2pgYCCAD43WVXn69CksLS01zs/IyAhTp07F33//DXNzc4wfPx4TJ07E27dvNd5XXuXHuSn2+aJOUaifubk5Xr58meN6xbpPe4hbW1tn66yVG9bW1pBIJLhx4wb8/f1z/VenTh2N81Ilv+5jgPh1E/tepkpROp5iXwti56fAgLGQksvl+Omnn+Dp6YktW7bAyclJ62mgxM4vKioKd+7cUf4pvsTCw8NzTBMXF6fxdGi2trZ48uSJ2m1MTEywY8cONGvWDD4+Phg8eDCio6M1ykfByckJb968wY8//oiIiIjPbh8ZGYkJEyYgPDwcTk5OGudXv359PHv2DAcPHlQu++uvv/Ds2TPY2dlhxYoV2Z4S7dixA0FBQXmaS9rBwQFHjhzB+PHj4eHhgS5duuDQoUNa7y+vdHVuin2+5NaXWr9GjRrh+fPn2L9/f7Z1aWlpWL58OSQSCerWrZtlXWRkpFZTH9aqVQsANO5cpS2x7mOA+HUT+14GFO3jKfa1IHZ+CpxLupDT19dH69at0bp16zzNASlmfjdu3MCNGzeyLb93757K3nXv37+Hn58fKlSooFE+tWrVwqFDh/D06VPlI3hVihcvji1btuDHH3+Ep6en1mOJjRkzBpcvX8bly5fRtm1bNGjQAA4ODrCxsUGJEiUgkUiQlJSk7Fl49+5dZGRkoEaNGhr3WAY+DF9z/vx5/Pzzz9iyZQsAIDQ0FObm5li/fj169+6Nzp07o1WrVjAxMYGPjw+8vb0hkUgwYMAAreqoYGBggHHjxsHJyQmzZ8/G/PnzceLECfzyyy952m9e5PXcFPt80dSXVj83Nzf8+++/WLRoES5cuJClp+bp06fx5s0bVKhQAR07dlSmycjIQEBAgFY98mvVqgUPDw88fPgQLVq00KrMmhDrPgaIXzex72VA0T6eYl8LYuenwIDxC6LJxO0FlV+jRo1yXBccHKxyuYeHB2JjY9GlSxeN8vr2229x8OBB7NmzB4sWLVK7rZGRETZs2IApU6bg/PnzWn1JGhsb488//8SyZcvg7u6OW7du4datWyr3JQgC9PX10bt3b8yaNUvjMREBoHbt2vjtt9+wcOFC5bh65cuXx/Lly1G9enUsWbIEM2fOxMGDByGRSCAIAiQSCcaOHauzscuqV6+Ov//+G7t27cLatWvh4uJSKAaA1+bcFPt8yYsvoX5yuRwrV67ETz/9hJs3b+LWrVvKdYIgwNbWFps3b84yaPjTp08hlUq1euXeqFEjyOVyJCQkaJSuT58+aNmypcZ55UTX9zFFfmLVDRD/XlbUj6fY14LY+SlwHEaRHTt2DBUrVkTDhg2LZH7a8PLyQkBAAJo0aQK5XJ7rdCkpKTh58iQMDAyyDQibk8zMTPzxxx+Ij4/P0yCm0dHRuHLlCvz9/REaGppldgQbGxvY29ujVatWOpmpID09HQEBATA0NETVqlWzvK4MCgpS/qIsU6YM2rdvr9Ex1MSrV68wd+5c3LlzBxKJJM8zvYh9bop9vhT1+ilERETg6NGjePToEZKTk2FpaYkmTZqgW7duKF68uFb7/BJpex8raGLeyzTxJR5Psa8FsfNjwEhEGlG8LhX7iTcRERUcBoxEREREpBbbMBZCGRkZiI2NhbGxsVbtRT6375cvX6qcZL5y5cpZ2jzkl/ysnyopKSmIiYmBtbV1vvY0F1twcDBu376NqKgoGBsbw8HBAfXq1cv3fIvq8QQ+vOK/desW4uLiYGtri5YtW8LU1DTf8svPayExMREBAQF4/fo1EhMToaenB3Nzc9SoUQNVq1bVaV7qrFmzBs+fP8e6det0ut/CUj+xiH3fFPtaEFN+3zsLw/dsfihad/sv3OPHj7F69WrcvHlTOfZUlSpVMHjwYHz33Xd52rePjw+2b9+Oq1evIiUlReU2xYsXR8uWLTFy5Mg8DcuSk/yoX1paGv744w/cuHEDRkZGaNOmDfr27QuJRILg4GD8/PPPuHnzJgRBQPHixdGjRw9Mnz5dlBuuwvjx4+Hv749///1Xo3Te3t64ffs2nJ2dUblyZeXy5ORk/Pzzzzh16lS2wWZr1qyJ33//HXZ2dlqVVczjWbNmTTRu3Bg9evSAk5OTKJ1pfHx8sGTJEjx69Ajm5ubo06cPJk6cCD09PWzYsAGbNm1Szv0MfBgEfdGiRejQoYNOy5Gf1/q9e/ewefNmeHl5ZanLx8qWLYvvv/8ew4YNg5GRUZ7yy0157t69q9P9iVU/Ly8vnDp1CuHh4bC2tkaHDh3Qtm3bHLdXlGvPnj1a5/mp/DpXCuJaEOt4FsS9ExDve7Yg7p0AX0mLrk6dOujWrVu2Xow3btzA2LFjkZqamu1Elkgk6N+/PxYsWKBVnhs2bMD69euV+7WwsFA5yXxsbKwyv3HjxmnVCF7M+mVkZGDIkCHw9vZW7lMikaBv376YOXMmevbsiaCgIBgZGcHMzAxRUVGQSCRo1KgR9u7dq3HdtKXthPYTJkyAh4cHrl27BgsLC+VyNzc3XLlyBcCHMRMrVKiA2NhY+Pr6Ij4+HmXKlMGxY8dQunRpjfIT+3jK5XJlr8wSJUrAyckJPXr0QJMmTTTeV24EBwfDxcUly9iVEokEI0eOhIODAyZNmoQSJUqgTp06MDMzg6+vL0JDQ2FoaIjDhw9DJpNplF9huNY/ZmBgAEdHRwQFBSE6OhoSiQRVq1bFjh07tBqM+e+//87Vdjt37kRQUBAWLlyYpVz9+/fXOE8x6/f7779j27ZtAJDlemjSpAlWrFiBMmXKZEszffp0nDp1SuNrXexzRexrARD3eIp97wTE/Z4V+96ppOWUgqQlVXNqpqSkCM2bNxdkMpkwceJE4cmTJ0JaWprw9u1bYc+ePULdunUFuVwuXL9+XeP8zp07J8hkMqFhw4bC1q1bhZCQkBy3DQ0NFbZt2yY0bNhQkMvlwrlz5wp1/fbt2yfIZDKhU6dOwvHjx4VTp04JLi4uglwuF5YsWSLUrVtXOHHihJCRkSEIgiAEBAQI3bp1E+RyuXDy5EmN66atfv36aTU/aZs2bYTevXtnWXbz5k1BJpMJbdq0EXx9fbOse/funTB58mRBJpMJixcv1jg/sY+nTCYTOnbsKDg5OQkymUw5j2ubNm2E//3vf8LLly813qc6c+fOFWQymTB79mwhIiJCiIiIEGbNmiXUq1dP6Nmzp9C5c+cs10d6erqwcOFCQSaTCbNmzdI4P7Gv9YsXLwoymUxo0qSJsHv3buHJkydCeHi44O3tLcyaNUuQy+XCTz/9JAiCIDx69EiYOHGi8jNQNf90buon5ny9Ytbv6tWrgkwmE2rWrCnMnTtX2L9/v7B06VKhcePGgkwmE1q1aiU8e/YsWzpdzTsuCPl7roh9LYh9PMW+dxbE96yY904FBowiU3VjOHXqlCCTyYRhw4apTHPs2DFBJpMJkyZN0ji/AQMGCLVq1RKePn2a6zQBAQGCo6OjMGDAAI3zE7N+3333neDo6CiEhYUpl0VFRQm1a9cW5HK5sGXLlmxp/Pz8BJlMJri5uWmUlyAIwl9//aXVX7t27bS66dWuXVv48ccfsyxbtWqVIJfLBU9PT5VpUlNThZYtWwrt27fXOD+xj+fH58r9+/eF+fPnK79AFMHF999/Lxw8eFCIj4/XeP+fat++vdC6dWshPT1duSw9PV1o1aqVIJfLhWvXrmVLk5aWJrRq1Upo06aNxvmJfa0PGTJEcHR0FB49eqRy/W+//SbI5XLhzp07ymWrVq0SZDKZsHXrVo3zk8lkgr29vTBu3Dhh1qxZOf41b95ckMvl2ZYX5vqNHj1akMvlwvnz57Msj46OFtzc3ASZTCY0bdo0W1l0GTDm57ki9rUg9vEU+95ZkN+zYtw7FdiGsRB4+PChcsBlVXr06IG1a9fiv//+03jf/v7+aNq0qdqJyj9Vo0YNNGvWDPfu3dM4P1Xyq37Pnj1D3bp1UbZsWeWyUqVKoW7durh9+zY6deqULY29vT1kMplWU0b9/PPPWg1wLPzfgNqaKl68OJKTk7Msi4qKAgA0aNBAZRojIyM4ODjg+vXrGucn9vH8WN26dVG3bl3MmTMHly5dwrFjx3Dt2jV4e3vj/v37WLx4Mdq1a4cePXqgRYsWWh3P8PBwtGjRIktHHQMDAzg4OCAiIkLlPLKGhoawt7dXOUOFNvLzWvfz80OtWrXg4OCgcv3333+Pbdu2wcPDQzk25IQJE3D48GGcPXsWo0aN0ii/cePGYcuWLXjw4AHmzZuXZVaJj7m6uiIqKgpLly7VrEKfELN+Pj4+kEql2drrWVpaYvPmzVixYgV27tyJYcOGYevWrdmmYNOF/DxXxL4WxD6eYt87C/J7Vox7pwIDxkLg3bt3AKC2XUiNGjXg5eUlVpF0Kr/ql5ycrLKtiWIy+/Lly6tMZ2trixcvXmiUF/Bh6rbMzEx0794denq5n4b98uXLynYrmqhWrRr+++8/JCcnKxs1K+obGRkJExMTlenUrVNH7OOpipGREZycnODk5ITo6GicOHEC7u7u8Pf3x+nTp3HmzBlYWVnBxcUF06ZN02jfenp6yMjIyLZcsSwlJUVlL9DU1FSNPm918vNaT01NVTs2pqJuijIAH4KE2rVr486dOxrn9+OPPyqnjpw4cSLatWuH+fPnq2yLpgti1i8uLk7tgOszZsyAhYUFfv/9dwwfPhybN29G48aNNcrjc/LzXBH7WhD7eIp97ywM8vPeqaCbuyDlSW4a2BYrVkw5WbsmpFIpbt68iefPn+c6zbNnz+Dl5aVVw2ZV8qt+5ubmiIuLy7Zc8Qsqp/0ZGBhoNRxMtWrVAHxoOL106dJc/1WqVEnjvACga9euiI+Px5IlS5TLnJycAABbt25VmebGjRvw9fVV+YTgc8Q+np9TqlQpDB06FO7u7jh+/DiGDh2K0qVL4+3bt9ixY4fG+6tYsSJ8fHyyPHlISkqCj48PACgbw38sOjoaDx8+hK2trfYV+Uh+XusVKlSAj49PjtOhXbt2DQCyBXSGhobIzMzUOD/gw/3l4MGDmDJlCq5evYrOnTvjr7/+0mpfnyNm/aysrLIEnv+vvTOPqyn94/jnljZKYcJVjWbQvVGJQTXWSCOGpDKaKVmzjGbDWIfs2zCWsdTMLyJ7DaNBY2gGo4WQKalfIUZapEUqpTq/P3rd+9N0S+dWzzWn7/v16vXinPOcz/N9lu/53nOeRRE+Pj5YunQpSkpKqk2maCyasq2w7gusy5O173wTnrOv0ti+Uwa9YVQBcXFxWLRokfz/Dx48AFA1c6179+4K02RkZCi1NdPEiRPx5ZdfwsPDAzNnzoSTkxPEYrHCazMzM3H27Fns2bMH5eXl8PLy4q0HsLOvQ4cO8j2WX+WTTz7BoEGDak33+PFj+VszPlhaWiIlJQW3b99mss7bhAkTcPLkSYSEhCAtLQ3Tpk2Dra0tfHx8EBAQgIyMDLi5ucHIyAj5+fmIjo7G4cOHAYD350WAfXnyQSKRYOHChfj6669x+fJlnDx5kvc9hg4dCn9/f8yZMwczZ84EAOzevRsFBQVwdXXFxo0boa+vj6FDh8qXEVq8eDGKioqU3pubZV93dHTE7t278fnnn2PlypXVHuwXL17EqlWrIBKJYG9vXy1deno6DA0NeevJUFNTw/Tp0zF8+HAsWbIEfn5+OHXqFFatWiX/kdUYsLTPxMQE8fHxqKioqDMg8/T0hJaWFpYvX445c+bU6lvrA8u2wrovsC5P1r5TFc/Z+tIYvlMGBYwq4OHDh3j48GGN4xcuXFDoGJ4+fYo7d+7A1taWt5aTkxOSk5OxZ88ebNq0CZs2bYKBgQHEYjF0dHQgEolQXFyMzMxM5OXlAagaczdjxgyFY9bqAyv7evTogZCQEGRlZVUbd9erV69aF2HNyclBUlJSnWt/1YaFhQVCQ0ORkJDAawN3TsmVq9TU1PDjjz/i008/xbVr1xAbGwt1dXWIxWKoq6sjKiqq2ucojuPQokULrFixotZxOnXBujyVQU1NDYMHD8bgwYN5p502bRpOnz6NK1euyMdhcRyHfv36YdmyZbh16xbmzJkDTU1NaGtr49mzZ+A4Dvr6+pg6dapS+WXZ16dOnYqzZ8/iypUr+OCDD2BkZAQDAwOkp6fj6dOn4DgOzs7OsLS0lKfJyclBcnKy0n39VUxNTXHw4EEEBwdjy5YtcHFxgY+Pj3ztwIbC0j5bW1vExsYiIiLitesOuru7Q1NTE4sXL1ZY1/WFZVth3RdYlydr36mK5yxfGuI7ZVDAyJi6Bn7X9is4NDQU5eXlSg8E/uKLLzBw4ED8+OOPiIyMRF5enrzRvoqWlhb69++PKVOm1DnepC5Y2jdq1CioqakhPz+/WoBTF0eOHEF5eblS61XZ29ujrKwMxsbGvNLt3LkTZWVlvPWAqsVyDxw4gJMnT+Lw4cOIj4/H33//XeO6tm3bwsHBAVOnTlV64VnW5ckaPT09HD16FDt37sS1a9egoaGBAQMGwMfHB5qamggICMCCBQtw9epVlJaWAqhaIHf16tX1Lo9XYd3XdXV1cejQIXzzzTe4cOECHjx4IH9Lpa2tDU9PT3zxxRfV0pSVlWH16tW1TiRRBk9PT9jb22PZsmX4/vvvAaBBA+1lsLTP3t4eO3bswO7du+u1ULWzszN0dHTw1Vdf1bqYeF2wbius+wLr8gTY+k6A7XNWVdDC3f8CSkpKUF5eDm1t7QZvKVRWVob79+/j8ePH1bYsEovFePfdd5t81wdFNKZ99dVq2bKlUmN/VE1xcTFSU1NRUFCAyspK6OrqwtjYWCkn3hj828tTEVlZWcjIyIChoWGjjV2sL43VF7KyspCQkICSkhK0bdsWVlZWKtnWLTQ0FBEREQCqfjg1FizskwUqfNp1bm4uSkpKmLQbFn6zMfuCqsuTpe98E5+zjQEFjARBEARBEESd0CxpgiAIgiAIgVFSUlLrqgLKQG8Y3zCysrIQFRWF7OxstGrVChYWFkpN85fx888/o0+fPsw/rd27dw/x8fFo06YNbG1t5a/gi4uLERgYiMjISBQUFMDIyAgjR47E2LFjG6RXXl6O/Px86Ovrv/bzTH5+PoqLi2tdV1BZGrvuXoWlfazrThG5ubk4ePAgYmJikJOTg5YtW8LCwgIeHh4wNzdv8P0LCgpw8eJFJCUlIT09vdpno06dOsHc3ByDBw+Gvr6+0hqq6nuKaOryfPLkCTQ1NWuU16NHj3D58mXk5uaic+fOsLe3b/A6d0L0LapsK6x9p5D13gTf+SpeXl64fv16gzdWkEEBI2POnDkDY2NjWFlZ1Tj37bffYt++fTUG+fbs2RPfffedUksMSKVSqKmpoW/fvnBxcYGjoyNatmypdP7rw6ZNmxAYGCj/v7GxMfbu3QsDAwN4enoiOTm52sxhkUiEDz74AFu3buWtlZ+fj7Vr1+LcuXMoLS2FhoYGBg8ejC+//LLWpW/mz5+P06dP8+5ErOsOYGsfwLbu/vOf/+DYsWPYtGlTtTKNi4vDrFmzkJ+fX2OGubq6OhYuXKj0UhRFRUX49ttvERISgvLy8lpnsItEIqirq8PNzQ3z5s1Tanwc676nivK8desWlixZIl9/rmfPnti0aRNMTExw/PhxrFq1qtos6Xbt2mHr1q1KD/YXqm9RhZ9m7VuErseybdYXLy8vxMbG4s6dO41yPwoYGSOVSjFmzBhs3Lix2vH169cjKCgIHMehe/fueOedd1BQUIDY2Fi8ePECpqamOHnyJLS1tXnryRCJRNDW1saIESPg7Oys1HIMr+O3336Dr68vWrRoIZ85GxMTg169eqF3797w9/fHkCFD4OTkBD09Pdy8eRP79+9HWVkZ1q1bx+sXV2lpKdzc3JCamlrjQaijo4Ply5crvN/8+fPxyy+/8O5ErOuOtX0s6w6o2sotLS0NV65cke8eUVxcDEdHR+Tk5MDc3Bxubm4wNjZGfn4+oqKicOrUKQBAUFAQ+vbty0uvpKQEHh4eSE5Ohrq6Onr37o0ePXqgY8eO8oezbOmLxMREXL9+HRUVFTAzM8ORI0fkO0bUF9Z9j3V5pqenY/To0SguLoaWlhbU1NRQUlKCd999F9u2bYOLiwsMDAwwaNAgtG3bFtHR0UhISIC+vj7OnDlTr4WpX0XovkUGi7bC2j6h67H2nRMmTKjXdampqSgqKqrxpUvpxfUbbVdqol4o2mT+4cOHnLm5OWdlZcX98ccf1c5lZ2dz48aN46RSKbd3716l9KZNm8bt2bOHGzFiBCeRSOQblNvb23Nbt27l0tLSGmJSNaZMmcJJpVLu8uXL8mMXL17kpFIp17dvX27x4sU10kRGRnISiYTz9vbmpRUQEMBJJBJuzJgx3PXr17mSkhIuKSmJ+/rrr+U2+vv710in7Ib2rOuOtX0s647jOM7Ozo7z8vKqdiwkJISTSCScr6+vwjTh4eGcRCLhZsyYwVtv8+bNnEQi4Xx8fLjMzMzXXp+dnc3NmDGDk0ql3ObNm3nrse57rMtzxYoVnEQi4davX8+9fPmSq6io4LZt28ZJJBJu7Nix3OjRo7nc3NxqaVatWsVJJBJu+/btvPWE7ltYthXW9gldj7XvlNkgayd8/pSxTwYFjIxRFHTs3buXk0gk3LZt2xSmSUlJ4Xr06MF9/PHHDda7desWt3z5cq5fv37yxiOVSrkJEyZwx44d4woLC3lrvIqtrS3n5uZW47irqysnlUq5u3fvKkw3btw4rl+/fry0XF1dOWtray4jI6PGuTNnznDW1tacVCrlvvvuu2rnGjNgbMq6Y20fy7rjOI6zsLDgvvjii2rHVq5cyUmlUi41NbXWdC4uLpyNjQ1vveHDh3NDhgzhysrK6p2mrKyMGzx4MDd8+HDeeqz7HuvydHR05AYMGMC9fPlSfqyiooIbMmQIJ5VKuYiIiBppnj17xvXu3VthO3sdzcm3NHVbYW2f0PVU4TvNzc05Pz8/LiYmptY/Z2dnTiqV1jiuLDRL+g0gLS0NIpEIo0aNUni+a9euMDMzQ2pqaoO1rKys4Ofnhz///BPbt2/H4MGDoa6ujps3b2LZsmXo378/5s6di0uXLim1Q0lhYaHCAcOywdy1Deo2MjKSTzyoL/fu3YOVlRU6duxY45yTkxOCgoLQunVr+Pv7Y/369bzuXV+asu5Y28ey7oCqBXP/uRWhbLxbXWM+O3bsiOLiYt56mZmZsLS05LVmnYaGBqysrJCZmclb7580dd9TRXmam5tX20dcTU1Nvjeuoglfenp6kEgk8gW3+dCcfEtTtxXW9gldj7Xv/Pnnn9GzZ08cOXIEe/fuRefOndGvX78af3p6egBQ47iyUMD4BiCbKGFiYlLrNSYmJko1rNrQ0NCAo6Mj9uzZg0uXLmHRokWQSqUoLS3F6dOnMWPGDKW2EGrVqhWePHlS47jsmKK9imXH+c6e5DgOBgYGtZ63srJCcHAw2rVrh6CgIPj5+fG6f31oyrpjbR/LugOAPn364Pbt29WC6a5duwIA4uPjFaapqKjAnTt3lFpst3Xr1kptNfbw4UO0bt2ad7raaKq+x7o81dTUFI7LlT2katvjuH379koFqM3NtwBN11ZY2yd0Pda+891338WhQ4ewaNEiREdHY+TIkTh48CDv+/CFAsY3ANl2RIq2EZJRWlra4OUoaqNt27bw9vbGiRMnEBYWhsmTJ6Ndu3YKO8DrkEqluHXrVrVZZgkJCYiLi0OrVq2wb9++GmmuXr2K27dvw8zMjJeWWCxWuNXTq3Tr1g3BwcHo0KEDjh49ikWLFqGyspKXTl00Zd2xto9l3QHAxIkTUVlZiU8//RT37t0DAIwZMwYGBgZYvnw5Hj16VO36srIyrFy5EpmZmRg2bBhvPTs7OyQnJ8Pf37/eafz9/ZGcnIz333+ft159aMy+x7o827Rpg+zs7BrH1dTU5JNuFPHixQt5UMmH5uZb/kljthXW9gldj7XvBKomR02cOBGnTp2CpaUlVq1ahQkTJjTKl8jaoL2kVUBYWBjCwsJqHE9OTq71l35KSkqte4o2Jt26dcOCBQswf/58/Pnnn7zTf/TRR4iJiYGnpyc++OADAMCvv/4KkUiEdevWwdfXF7m5uRgxYgRatWqFv/76C/v27YNIJMKHH37IO6/nz59HTk4O3nrrrVqvMzU1RXBwMCZNmoSTJ082aAs7lnXH2j6WdQdUfbL09fXF9u3b4ezsjGHDhqF///6YPHkytm/fDicnJ9jY2MDIyAgFBQW4efMmsrKyIBaLMWvWLN56n332GS5cuICtW7fil19+wYgRI2BhYaFwlvTt27cRHh6OlJQUtGzZEr6+vrz1+NLQvse6PDt37qxw+ZENGzZgw4YNtaZLSUlR+KnwdQjdt/DNX0PaCmv7hK7H2ne+iomJCfbt24fjx49j48aNcHFxwfTp0zFz5swG3VcRFDCqgNrGnJw+fRqDBg2qcfzWrVtIT0+Hs7NzU2dNjpqamsK8vI6RI0ciNjYWhw4dwokTJ+T3WrhwIYYPHw5vb28EBQXhwoUL8jQcx6F///4YP348L61Bgwbh119/xbFjxzB79uw6rzU2NsbBgwfh7e0tH3eoDCzrjrV9LOtOxuzZs9GhQwesX78e4eHh+PXXX6vd+8qVK/J/A1VjcTZs2KDUgtomJiYIDg7G3LlzkZKS8tpf4hzHwdTUFJs3b65zyEFjo2zfA9iWp6WlJaKiopCQkAALC4t6pblx4wbS09OV+owqdN+iDMq2Fdb2CV1PFb7zn7i7u2PQoEFYvnw5du3ahbNnz6KsrKxR7i2D1mH8FxASEoLff/8drq6uGDp0KK+0ixYtQu/eveHu7t5EuVNMfHw8YmNjoaGhATs7O3Tp0kV+7sSJEzh16pR8U3tHR0dMmDCB12QEAHj69CnWr18PfX19LF26tF5pcnNzsXTpUhQWFuLAgQO89JShIXWnKvtY1N0/KSoqQlhYGKKjo5GSkoKCggJUVlZCV1cXRkZG6N69OxwcHNCrV68G6QBAZWUlIiIicOHCBSQnJyvc6UUqlWLYsGEYOnRonZ9X60JVfQ9gU565ubn4+++/8fbbb6NNmzb1SnP06FFcunQJkyZN4r3uowwh+hbWbYW1fULXk6EK36mIsLAwrF27Fnl5eRCJRLRwN0EQBEEQBFGT3Nxc3LhxAwDg4ODQKPekgJEgCIIgCIKoE5ol/YZQUlKCnJwc5OTkoKSkhLn+uXPnsH//fua6TYHQy1LV9gkd1n1BaHrNqX0KrSxVXXdCeg69CTR2edIbRhVRUlKC0NBQXLhwAUlJScjPz692Xl9fH+bm5nBwcICLi0uTb0Tf2JuU82XXrl149OgR1q5dyzut0MvyTbPvnzSk7gCgoKAAf/zxB7KysmBoaIhBgwbVuc/w+fPnkZSUhDlz5iib5Tph3Rf+7Xqs2+dff/2F6OhoaGpqYuDAgfJxYmVlZQgMDERERARyc3PRuXNnuLm5wcnJqUF6dfFvL8s3zbf8m59DwJvVNoHGL08KGFVAVFQU5s2bh9zc3Neu0i8SidC2bVts3LgR/fv35611//79el339ddfIyEhAWfPnq2Wp3feeYe3pjJ89NFH+Ouvv3g3bKGXJUv7lEXZugOAiIgILFy4EIWFhfJjWlpamD59OmbPnq1wxuL8+fPxyy+/8NZjXX9C1wPYt8+NGzdi79698v+rq6vDz88Prq6u8PHxweXLl2toTpo0CQsWLOCl0xzKUui+Uxka4stYtU1AdeVJASNj7ty5g/Hjx+Ply5cYOHAgRo0ahR49etS6FtyZM2dw6dIlaGho4NixYzA3N+elJ5VKlV7mQSQSKVxnrSlQpqMKvSxZ26csyjrZu3fvwsXFBWVlZTA1NYVUKsXDhw+RmJgIkUgEOzs77Nixo8ai58oGjKzrT+h6rNvnH3/8gZkzZ0JHRwejRo2Cmpoazp49i9LSUnzzzTf45ptv4O7uDmdnZ7Rp0wZRUVHYtm0bnj9/jgMHDqBPnz711hJ6WQrddyqLsr6MZdsEVFeetA4jY3bt2oXy8nJs3LgRY8aMUXiNnp4e9PT00K1bN4wdOxZhYWGYP38+du3ahR07dvDWFIlEr10oNycnBy9fvqxzz9n6UN9fPv+ktLSUdxqhlyVr+1jWHQAEBgairKwMU6dOxbx58+QOMCoqCkuWLEFkZCQmTZqEH3/8Ual1AhXBsv6Erse6fR45cgQikQg//PCD/AE7fvx4uLu7Y+3atXBzc8OqVavk13fp0gUdO3bEnDlzEBISwvuhLOSyFLrvZO3LWLdNgL1vAShgZE5sbCx69uxZaydVxOjRo3Hw4EHExsby1uvXrx+uXr2Krl27ws/PT+EG6cD/xzpERETw1ngVJycnpX75cBzHO53Qy5K1fSzrDgCio6PRqVMnzJ07t1p6Ozs7hIaGwtfXF7GxsZg4cSL27t1b697E9YV1/Qldj3X7jI+Ph7m5ebWHq4WFBSwtLREfHw9PT88aaRwcHGBkZISbN2/y0hJ6WQrdd7L2ZSzbJsC+PGXQLGnGFBcXo3379rzTGRoaori4mHe6/fv3w8/PDzdu3MCHH36IAwcOvHa8SmMgFot5/SmzeKnQy5K1fTJY1B0AZGdnw9zcXOHC2G3atEFgYCCGDBmC5ORkeHp6Kty3mA+s60/oeqzbZ0FBAYyNjWsclz0sTU1NFaYzNTXl3XaEXpZC950yWPkylm0TUF150htGxpiYmODatWt4/vw5dHV165Xm+fPniI2NVXp7sgkTJmDIkCFYtmwZ1qxZg7CwMKxevVrpTc/rwsjICI8fP8bhw4dr3VtZEbKxI3wQelmyto9l3QGArq4uKioqaj2vqamJnTt3Yu7cuQgPD4enpyf27dvHW+dVWNaf0PVYt89WrVrhxYsXNY7LHvLa2toK07Vu3RqVlZW89YRclkL3nax9Geu2CbD3LQC9YWTOmDFjkJeXh8mTJyMhIeG119++fRuTJ09Gfn4+r88H/6Rjx44ICAjA+vXr8eDBA4wbNw7fffddo+81aWlpCaAq302N0MuStX0s6w6ocuqvG3ytrq6OLVu2wNnZGQ8fPoSnpycePXrUIF1W9Sd0Pdbts127dsjKyqpx3MbGBq6urrWmy8nJUXo4g1DLUui+k7UvU0XbBNj7FnrDyJgpU6bgypUriImJgbu7O4yMjNCjRw+IxWLo6OhAJBJVm5326NEjcBwHGxsbTJkypcH6Y8eOxcCBA7FixQr4+/sjPDwcK1eubATLqrCwsEB4eDgSEhJ47Z2szOt0oZcla/tY1h0A9OrVC8HBwYiLi4O1tXWt16mpqWHDhg3Q0tLCsWPHkJGRoZTeP2nq+hO6Huv2KZVKce7cuRpvxdzc3ODm5qYwTWlpKRITE2FlZaW0nYDwylLovpO1L1Nl2wTY+RZaVkcFlJeX44cffkBQUFC1hVJlg21frRJ9fX14e3tj+vTpjb5J+blz57By5Uo8ffoUWlpaKC0tbfACn4mJidi5cyd69eqFadOm1TvdxYsXkZubCxcXF156Qi5LgK19rOsuMjISU6ZMgaOjI7Zv316vNOvWrUNQUBBEIlGjLu7bVPUndD2W7fP48eMIDAzE6tWr8d5779UrTUhICJYuXYrPP/8cs2bN4q2pCCGUpSr0akMIz6E3pW0CTetbKGBUIRUVFbhx4waSkpLw+PFjFBUVAagaDyEWi2Fubo7evXtDXV29yfJQUFCAtWvXIiIiAiKRCFevXm0yraZE6GX5JtjX2JSXl+PatWsAqmZG15fz58+jsLCQt1N/Haz7gpD03tT2GR8fj6ysLPTs2ROGhoaNdl8hleWbUHdCeQ7xoanaJtB05UkBI0EQBEEQBFEnNOlFBUybNg0ODg64cePGa6+9fv06HBwcMHPmTNJTsRbpNb7e9OnTBW0f6f179YRsG+mRnjJQwMiYS5cu4c8//4StrS169+792uvfe+892Nra4uLFi4iKiiI9FWmRXtPoXb58WdD2kd6/U0/ItpEe6SkLBYyMOXPmDEQiEa9I/9NPPwUAhIWFkZ6KtEiP9Eiv+egJ2TbSIz1loYCRMTdv3kS3bt0UrgpfG2KxGBKJpF6vnpuTnpBtIz3SIz3V6QnZNtIjPWWhgJEx2dnZ6Ny5M+90JiYmChcGbc56QraN9EiP9FSnJ2TbSI/0lIUCRsZUVlYqvTioMumErCdk20iP9EhPdXpCto30SE9ZKGBkTLt27fDw4UPe6R48eKDUFkJC1hOybaRHeqSnOj0h20Z6pKcsFDAyxsLCAikpKUhLS6t3mrS0NKSkpMDCwoL0VKRFeqRHes1HT8i2kR7pKQsFjIwZMWIEOI6Dn58fysvLX3t9RUUFVqxYIU9LeqrRIj3SI73moydk20iP9JSGI5gzbtw4TiqVcp6enlxKSkqt16WmpnJeXl6cVCrlxo0bR3oq1iI90iO95qMnZNtIj/SUgbYGVAGZmZnw8PBARkYGRCIRpFIpLCws5GMLcnNzkZCQgKSkJHAch44dO+Lw4cMQi8Wk14xsIz3SIz3V6QnZNtIjPWWggFFF5OXlYfny5fjtt9/AcRxEIlG187Jjw4cPh5+fX4MGqgpdT8i2kR7pkZ7q9IRsG+mRHl8oYFQxDx48wO+//47ExETk5eUBANq0aYPu3btjyJAhMDU1Jb03UIv0SI/0mo+ekG0jPdKrLxQwEgRBEARBEHVCs6QJgiAIgiCIOqGAkSAIgiAIgqgTChgJgiBeYejQoZBIJIiJiVF1VgiCIN4YWqg6AwRBEI2Bl5cXrl69Wu2YpqYm9PT08NZbb6F79+6wsbHBiBEjoKOjo6JcNg9iYmIwceJEAEBycnK1czt27MD3339f7ViLFi2gq6sLAwMDmJmZwdraGqNHj0b79u2Z5ZkgiLqhgJEgCEEhFovla41VVFSgsLAQaWlpSE5OxokTJ7B69WrMmzcPHh4eCtObmJhAU1OTgsomRldXF2ZmZgCqlv8oKipCVlYWzp07h3PnzmHLli2YMGEC5s2bR3VBEG8AFDASBCEoXF1d4evrW+3Yy5cvERcXh8DAQERERMDPzw/37t3DkiVLaqQPCgpildVmTffu3XHgwIFqxziOw927d3Hs2DEcPHgQwcHBSEhIwP79+6GlpaWinBIEAdAYRoIgmgEaGhro27cvdu/eja+++goAsH//fpw7d07FOSNeRSQSoWvXrli8eDECAwOhoaGBuLg4bNq0SdVZI4hmD71hJAiiWTFjxgxER0cjMjISO3fuhKOjY7XzQ4cORXp6Ovbv3w8bG5tq56KiohAcHIxbt24hLy8P2traaNOmDSQSCezt7eHm5lZDr7S0FIcPH0Z4eDju3r2LkpIStG/fHgMGDMD06dNhYmJSI01mZiZ+++03XLp0CWlpacjOzoaamhqMjY0xZMgQTJ48udZdG1jlsamxsbHB7NmzsW3bNhw9ehQzZsyAoaEh83wQBFEFvWEkCKLZ4eXlBQBISkrC48eP65Xm+PHjmDRpEs6fP4/S0lJ07doVRkZGePbsGc6fP4/t27fXSPP48WOMGzcO69atQ1xcHFq1aoUuXbogNzcXR48ehbOzs8LZ2EFBQVi9ejViYmJQUVGBbt26wdDQEPfv30dAQABcXFyQnp6u0jyy4JNPPoG6ujrKyspw5coVleSBIIgq6A0jQRDNjj59+kAkEoHjOMTFxaFTp051Xl9RUYFvv/0WALB06VJ4eHigRYv/u8+7d+/WCGjKysowa9YspKamYuDAgVi2bBnefvtt+bkdO3YgICAAn3/+OcLDw2FgYCBPO2jQIAwePBh9+vSppvP06VNs3rwZoaGh8PPzww8//KCyPLJAX18fZmZmuHPnDm7evImxY8cy1ScI4v/QG0aCIJodrVu3hq6uLgAgJyfntdfn5uYiPz8frVu3hpeXV7VADAC6dOkiX0ZGxs8//4ykpCR069YN33//vTwQA6qW+5k7dy7s7e2Rl5eH48ePV0trZ2cHW1vbGjrt2rXDmjVr0KFDB1y+fLla3lnnkRWyYP7p06cq0ScIogoKGAmCaJa0bNkSAFBUVPTaa9u1awdtbW08e/YMFy9erNf9z5w5AwBwcXGBtra2wmtk4yejo6NrnCsqKkJoaCgWL16MqVOn4uOPP4aHhwc+/vhjFBUVgeM4JCYmqjSPLOBTTwRBNB30SZogiGaJLACRvWmsCzU1NUyZMgW7du2Cj48PzMzMYGdnB2tra/Tt21fhZIykpCQAwE8//YTz588rvG9hYSEAICMjo9rxmzdvwtfXF0+ePKkzX/n5+SrLIyv41BNBEE0HBYwEQTQ78vPz8fz5cwBVb+bqw2effYZOnTohODgYSUlJ+O9//4ugoCCIRCLY2dlhwYIFkEql8uufPXsGAEhNTX3tvV+8eCH/9/PnzzFnzhzk5OTAxsYGPj4+kEgk0NfXh6amJoCqySCxsbEoLy9XSR5ZIpvcU996IgiiaaCAkSCIZkdsbKz837169apXGpFIBHd3d7i7uyM3Nxc3btzA1atXcebMGURGRsLb2xunTp1Chw4dAFR9Sn327Bn27dsHOzu7euft0qVLyMnJgVgsRkBAgMJPxa++WVRFHlmRl5cnD2atra1VmxmCaObQGEaCIJodsh1GunfvLt9GkA9t27aFg4MDFi9ejPDwcBgbGyM/Px+nT5+WXyPb9u769eu87v33338DACwtLWsNFu/fv6/SPLLi0KFDqKiogJaWFgYMGKDq7BBEs4YCRoIgmhX+/v7yCRyzZ89u8P10dXUhkUgAAFlZWfLjTk5OAIAjR44gLy+v3veTBYnZ2dkKz+/duxcVFRUqzSMLYmJisHv3bgDARx99hLfeekvFOSKI5g0FjARBCJ7y8nLExsZi1qxZ2LJlCwDA29sbw4cPr1f61NRULF68GLGxsaisrKx27sqVK4iKigJQ9VZQhru7O8zMzPDkyRN4e3vj1q1bNe579+5dbNu2DREREfJjst1l4uLicOjQIXAcJ7dh3759CAgIULivMss8NiV3797F2rVrMWXKFLx8+RLW1taYN28eE22CIGqHxjASBCEoQkNDERkZCQCorKxEYWEh0tPT5ZM29PT0MHfuXHh4eNT7ni9fvkRoaChCQ0Oho6ODt99+G5qamsjKypK/CRw2bBhGjhwpT6OlpYWAgADMnj0biYmJGD9+PAwNDSEWi1FeXo709HQUFBQAANatWydPJ5VKMW7cOPz0009YsWIFdu3ahQ4dOuDRo0fIz8/H+PHjkZaWhqtXr6osj41BYmKivA44jkNRURGysrLkei1atICnpyfmzZunMEAmCIItFDASBCEoMjIy5EvAaGhoQE9PD507d4a5uTlsbW0xYsQI6Ojo8Lqnqakp1qxZg6ioKCQmJiIzMxNFRUXQ09PD+++/D2dnZ4wZMwZqatU/2ojFYhw9ehSnTp3C2bNnkZiYiDt37kBPTw+dOnWCg4MDhg0bVmN83po1a2BmZoaQkBA8ePAAL168QNeuXeHu7g5XV1f51oaqzGNDef78OW7cuAGgKjhs1aoVDAwMYGNjA2tra4wePRrt27dvVE2CIJRHxMm+dxAEQRAEQRCEAmgMI0EQBEEQBFEnFDASBEEQBEEQdUIBI0EQBEEQBFEnFDASBEEQBEEQdUIBI0EQBEEQBFEnFDASBEEQBEEQdUIBI0EQBEEQBFEnFDASBEEQBEEQdUIBI0EQBEEQBFEnFDASBEEQBEEQdUIBI0EQBEEQBFEn/wMthfFx3uvjDQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 720x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Compute the frequency of each 'Disease_ID'\n",
    "counts = nan_df['Disease_ID'].value_counts()\n",
    "\n",
    "# Select the top 20 diseases\n",
    "top_20_diseases = counts[:20]\n",
    "\n",
    "# Plot the frequencies\n",
    "plt.figure(figsize=(10, 6))\n",
    "top_20_diseases.plot(kind='bar')\n",
    "plt.title('Distribution of Diseases with NaN Class')\n",
    "plt.xlabel('Disease ID')\n",
    "plt.ylabel('Frequency')\n",
    "plt.xticks(rotation=90)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "9978bb49-58eb-4e03-a574-e65dee8f5814",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12902"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(nan_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a336a30b-f16e-4c04-aa90-3b8c67f2d609",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from tdc.multi_pred import GDA\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "merged_df = pd.read_csv('/nfs/dpa_pretrain/data/pretrain/disgenet_updated.csv')\n",
    "desired_disease_id = 'C0002395'\n",
    "disease_row = merged_df[merged_df['diseaseId'] == desired_disease_id]\n",
    "\n",
    "# Extract the disease name\n",
    "# disease_name = disease_row['NofGenes'].values[0]  # If there could be multiple rows, this will get the disease name from the first row\n",
    "\n",
    "# print(f\"The disease name for Disease_ID {desisred_disease_id} is {disease_name}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "329f7469-a4f2-4058-a0f9-8f4b57f52ec0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "191"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(disease_row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5b8e5ad7-abee-44ab-8f53-e94c00d2a463",
   "metadata": {},
   "outputs": [],
   "source": [
    "nan_df = disease_row[disease_row['proteinSeq'].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1d1ea96f-8cf9-436b-950d-0522d3dace10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>AMD1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>AQP4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>GPX1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>703</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>MDH1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>747</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>MPZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>OPRK1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1110</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>OPRL1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1181</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>MAPK10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1263</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>VDR</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4175</th>\n",
       "      <td>C0002395</td>\n",
       "      <td>CCL4L1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     diseaseId geneSymbol proteinSeq diseaseDes  score\n",
       "16    C0002395       AMD1        NaN        NaN    NaN\n",
       "101   C0002395       AQP4        NaN        NaN    NaN\n",
       "405   C0002395       GPX1        NaN        NaN    NaN\n",
       "703   C0002395       MDH1        NaN        NaN    NaN\n",
       "747   C0002395        MPZ        NaN        NaN    NaN\n",
       "979   C0002395      OPRK1        NaN        NaN    NaN\n",
       "1110  C0002395      OPRL1        NaN        NaN    NaN\n",
       "1181  C0002395     MAPK10        NaN        NaN    NaN\n",
       "1263  C0002395        VDR        NaN        NaN    NaN\n",
       "4175  C0002395     CCL4L1        NaN        NaN    NaN"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nan_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66367e21-261c-4eeb-85ce-0fef3b2062ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "desired_disease_id = 'C0002395'\n",
    "disease_row = merged_df[merged_df['Disease_ID'] == desired_disease_id]\n",
    "\n",
    "# Extract the disease name\n",
    "disease_name = disease_row['Disease'].values[0]  # If there could be multiple rows, this will get the disease name from the first row\n",
    "\n",
    "print(f\"The disease name for Disease_ID {desired_disease_id} is {disease_name}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "f918c14f-bb01-4359-bebe-5666270eccc3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The disease name for Disease_ID C0009375 is Colonic Neoplasms: A benign or malignant neoplasm that affects the colon. Representative examples of benign neoplasms include lipoma and leiomyoma. Representative examples of malignant neoplasms include carcinoma, lymphoma, and sarcoma. Colonic adenomas always exhibit epithelial dysplasia and are considered premalignant neoplasms.\n"
     ]
    }
   ],
   "source": [
    "desired_disease_id = 'C0009375'\n",
    "disease_row = merged_df[merged_df['Disease_ID'] == desired_disease_id]\n",
    "\n",
    "# Extract the disease name\n",
    "disease_name = disease_row['Disease'].values[0]  # If there could be multiple rows, this will get the disease name from the first row\n",
    "\n",
    "print(f\"The disease name for Disease_ID {desired_disease_id} is {disease_name}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "3045f419-f72b-41cf-9e54-547a019c401b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "135"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(disease_row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2367342-f169-460f-a419-6cd837031b25",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 276,
   "id": "d6ce3b69-ea24-491c-87c1-f263643a7ba7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gene_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Disease_ID</th>\n",
       "      <th>Disease</th>\n",
       "      <th>Y</th>\n",
       "      <th>diseaseClass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6015</th>\n",
       "      <td>18</td>\n",
       "      <td>MASMLLAQRLACSFQHSYRLLVPGSRHISQAAAKVDVEFDYDGPLM...</td>\n",
       "      <td>C0557874</td>\n",
       "      <td>Global developmental delay: A delay in the ach...</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6016</th>\n",
       "      <td>86</td>\n",
       "      <td>MSGGVYGGDEVGALVFDIGSYTVRAGYAGEDCPKVDFPTAIGMVVE...</td>\n",
       "      <td>C0557874</td>\n",
       "      <td>Global developmental delay: A delay in the ach...</td>\n",
       "      <td>0.3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6017</th>\n",
       "      <td>162</td>\n",
       "      <td>MTDSKYFTTTKKGEIFELKAELNSDKKEKKKEAVKKVIASMTVGKD...</td>\n",
       "      <td>C0557874</td>\n",
       "      <td>Global developmental delay: A delay in the ach...</td>\n",
       "      <td>0.3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6018</th>\n",
       "      <td>318</td>\n",
       "      <td>MALRACGLIIFRRCLIPKVDNNAIEFLLLQASDGIHHWTPPKGHVE...</td>\n",
       "      <td>C0557874</td>\n",
       "      <td>Global developmental delay: A delay in the ach...</td>\n",
       "      <td>0.3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6019</th>\n",
       "      <td>652</td>\n",
       "      <td>MIPGNRMLMVVLLCQVLLGGASHASLIPETGKKKVAEIQGHAGGRR...</td>\n",
       "      <td>C0557874</td>\n",
       "      <td>Global developmental delay: A delay in the ach...</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Gene_ID                                               Gene Disease_ID  \\\n",
       "6015       18  MASMLLAQRLACSFQHSYRLLVPGSRHISQAAAKVDVEFDYDGPLM...   C0557874   \n",
       "6016       86  MSGGVYGGDEVGALVFDIGSYTVRAGYAGEDCPKVDFPTAIGMVVE...   C0557874   \n",
       "6017      162  MTDSKYFTTTKKGEIFELKAELNSDKKEKKKEAVKKVIASMTVGKD...   C0557874   \n",
       "6018      318  MALRACGLIIFRRCLIPKVDNNAIEFLLLQASDGIHHWTPPKGHVE...   C0557874   \n",
       "6019      652  MIPGNRMLMVVLLCQVLLGGASHASLIPETGKKKVAEIQGHAGGRR...   C0557874   \n",
       "\n",
       "                                                Disease    Y diseaseClass  \n",
       "6015  Global developmental delay: A delay in the ach...  0.4          NaN  \n",
       "6016  Global developmental delay: A delay in the ach...  0.3          NaN  \n",
       "6017  Global developmental delay: A delay in the ach...  0.3          NaN  \n",
       "6018  Global developmental delay: A delay in the ach...  0.3          NaN  \n",
       "6019  Global developmental delay: A delay in the ach...  0.4          NaN  "
      ]
     },
     "execution_count": 276,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nan_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4d285c22-d3cc-4e06-8173-f08f10640274",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Find rows where 'diseaseClass' contains both 'C' and 'F'\n",
    "CF_df = merged_df[merged_df['diseaseClass'].str.contains('C', na=False) & merged_df['diseaseClass'].str.contains('F', na=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d3ed7b32-a378-4f61-852d-eab4b57b9580",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4EAAAKuCAYAAAAfJxmbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABvI0lEQVR4nO3deZgsZX328e8tO4IgiogsogguoCIgaqKGuERxQ31diEbBNcYVNSpqEtBXfBE1cYtGohE0KuKOuO+aKLvIKuIGIiCICii4IL/3j6qBPn165sw5zunqmvp+rutcp+up7p57nu6q6V/XU0+lqpAkSZIkDcONug4gSZIkSZoei0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASYOQ5D+S/PMSPdf2SX6TZJ12+WtJnr4Uz90+32eT7L9Uz7caP/c1SX6R5JJF3r+S3K69vWT9OwSr8xqvrfdDkr2TXLiEz3dkktcs1fOtTUkOSfLfS/Rcf5PkE0vxXGtTkrsk+VbXOSTNBotASb2X5CdJrklyVZJfJ/lWkmcluX4fV1XPqqr/u8jnesBC96mqC6pqk6r60xJkX+nDaFXtU1VH/bnPvZo5tgNeDNypqm65uo9fbP8uV6MF8WKszmvcxfthVZIckOR//sznWL99/5+X5LfttvdfSXZYophrmmvvJNe1X/TM/fvUAg95LXDYyONX672wmtn2T3JKkiuTXJjk8CTrjqzfIsnH2/48P8kT5tZV1enAr5M8fG1kk9QvFoGSlouHV9WmwK1pPpC9DHj3Uv+Q0Q9cy8ytgcur6tKug2gwPgI8AngCsBlwV+AU4P5dhmpd1H7RM/dvYuGU5O7AZlV1/JRybQwcCNwcuAdNX/3jyPp/B/4AbAU8EXhHkl1G1r8f+PupJJU00ywCJS0rVXVFVR0LPB7YP8musOJQtSQ3T3Jce9Twl0m+meRGSd4HbA98qv32/6VJdmi/2X9akguAr4y0jRaEOyY5MckVST6ZZIv2Z6005G7uaGOSBwOvAB7f/rzvtuuvH17a5vqn9lv9S5O8N8lm7bq5HPsnuaAdyvnK+fomyWbt4y9rn++f2ud/APBF4FZtjiPnefxLklyc5KIkTx1bt8r+bdfdKslH2ww/TvL8kefYK8m328ddnORtSdZv1yXJv7V9cEWS00de2w2SvKHtg5+nGZq60QL98Iwk56Q5cnx2kt3b9ju2ff/rJGclecTY7/fvST7dPu6EJDu2677R3u27bf89PslN2z64LMmv2tvbjjzf6Gt8QJL/aX+HX7X9ss8a3vc2Sb7RZvxSm3nBYY9JXtG+d36S5Ikj7fO9X+4I/Adwr/b3/fXI0910Uh9N+JkPAB4I7FtVJ1XVte22++9VNfHLmyQHJfnhyOv2qJF1i+mXr7eP/SJNEbUU9gG+PvJzVnovtO3PSPKDdns4NsmtRh5TSZ6f5Eft6/D6jIxiGFVV76iqb1bVH6rqZzRF3V+2z3Nj4P8A/1xVv6mq/wGOBZ408hRfA+6fZIMl+v0l9ZRFoKRlqapOBC4E7jNh9YvbdVvSfGP+iuYh9STgApqjiptU1eEjj/kr4I7Ag+b5kU8GngrcCrgWeMsiMn6OZijZh9qfd9cJdzug/ffXwG2BTYC3jd3n3sDtaY4K/Ev7IX2St9Iccblt+/s8GXhKVX2J5sPs3NGPA8YfmKZg/UeaD+47AQsNmZ3Yv+0H208B3wW2afMemGSuT/8EvJDmA/q92vXPbtf9DXBfYGdgc5oi//J23eva9t2A27XP/S+TgiV5LHBI+7vfhOZI1OVJ1muzfQG4BfA84P1Jbj/y8L8FXgXcFPgBcChAVd23XX/Xtv8+RPP39T00R1i3B65h5ddt1D2Ac9vf/XDg3UmyBvf9AHAicLP293zSpCcYccv2ebYB9geOGPmd53u/nAM8C/h2+/tuPvJ8E/toggcAJ1bVT1eRb9QPabbnzdqf8d9Jth5Zv6p+OaVd93/b33Up3Ln9mcDk90KS+wH/D3gcsDVwPnD02PM8CtgT2B3Yl2Zfshj3Bc5qb+8M/Kmqvj+y/rvA9UcC28LxjzT7C0kDZhEoaTm7CNhiQvsfaT6M3bqq/th+s16reK5Dquq3VXXNPOvfV1VnVtVvgX8GHpd24pg/0xOBf62qH1XVb4CXA/tlxaOQr6qqa6rquzQf+lYqJtssjwdeXlVXVdVPgDey6iJhzuOA94z8jocscN/5+vfuwJZV9er2SMaPgP8E9gOoqlOq6vj2qNBPgHfSFB9zz7kpcAcgVXVOVV3cfsh/BvDCqvplVV1FU1jvN0+2pwOHt0efqqp+UFXnA/ekKbAPa7N9BTiOpqiZ87GqOrGqrqU5ArPbfB1QVZdX1Uer6uo206Ejv8sk51fVf7bnmR7V9t9Wq3PfJNvT9PG/tL/D3JGgVfnnqvp9VX0d+DQ3vHfX5P2y2D66GXDxIrJdr6o+XFUXVdV1baF9HrDXyF1W1S9zv+c3aAr+hdyqPSI89+9x89xvc+CqVTzXE4H/qqpTq+r3NNvwvbLiuY+va9+/FwBvYsX33URJnkJTOL6hbdoEuGLsblfQbDejrmpzSxowi0BJy9k2wC8ntL+e5ijFF9ohWAct4rlWdcRidP35wHoszZCzW7XPN/rc67JigTA6m+fVNB8Gx90cWH/Cc22zGjnGf8f5zNe/t2bswzXNUcKtAJLs3A6bvCTJlTTF3M0B2qLsbTTnPP08yRFJbkJztHFj4JSR5/xc2z7JdjRHlCb+flV13djvONo/i+ln2t9l4yTvbIdRXgl8A9h8gS8Grn/uqrq6vTnf889331sBvxxpg1W/b3/VFvVzzm+fZ03fL4vto8tpirRFS/LkJKeNvM67suI2tlC/TPo9F3JRVW0+8u+Yee73K1YussatsA23X+Zczop9Ob5t3YoFJHkkzbnP+1TVL9rm39Ac3R51E1YuUjcFfr2KzJKWOYtASctSmgkbtgFWmsGwPbLx4qq6LfBw4EVJ5iajmO+I4KqOFG43cnt7miNXvwB+S1OkzOVahxULlFU970U0xdPoc18L/HwVjxv3izbT+HP9bJGPv5iVf8eJFujfnwI/HvtwvWlVPaR96DuA7wE7VdVNaArEjDzvW6pqD5rhbTsDL2l/r2uAXUaec7Oqmq/4+Ckw6Ty1i4Dtxs7FWp3+GfdimiF392h/l7lhgvMN8VwKFwNbJNl4pG27+e7cuml7Ltmc7Wn6YlXvl1W9b1flS8BeGTlPciFJbk1z1Pi5wM3aIahnsrj+vJjJv+dSOJ3mvbiQFbbhNsfNWPG9Nb5tXTTfk7VDs/+TZtj6GSOrvg+sm2Snkba7csNwUdpzEddnZAirpGGyCJS0rCS5SZKH0Zxz899jH5Lm7vOwJLdrhxJeSXMu2tzlHn5Ocw7U6vq7JHdqP4C/GvhIOyzt+8CGSR7annf2T8DopAw/B3aYbyII4IPAC9uJLTbhhnMIr12dcG2WY4BDk2zafqh+EbDYa6UdAxww8jsePN8dF+jfE4Erk7wsyUZJ1kmya1uwQ3OE4krgN0nuAPzDyHPePck92j78LfA7mvOfrqP5QPxvSW7R3nebkfMMx70L+Mcke6Rxu7YvTmif96VJ1kuyN00BO37u1nzG3zeb0hSnv04zSdC8/bVU2mGtJwOHpLn8wr1ofodVeVV7//sADwM+vIj3y8+BbdNO3LMGWb9EMxnRx9vXYt325zwrY5MOtW5MU3heBtcPhdx1kT9rrl/mfs97s7h+WYzPsPIw3/H3wgeApyTZLc2ELK8FTmiH2M55SZrJhLYDXgB8aNIPa88vfD/wf6o57/l67ZHOjwGvTnLjJH9Jc37h+0butjfwlXZYqqQBswiUtFx8KslVNEd6Xgn8K/CUee67E82RiN8A3wbeXlVfa9f9P+Cf2iFn/zjP4yd5H3AkzZC0DYHnQzNbKc3kJu+i+eb/tzSTpsz5cPv/5UlOnfC8/9U+9zeAH9MUP89bjVyjntf+/B/RHCH9QPv8q1RVn6U5V+krNEM9v7LA3Sf2b1tYPJzmPLEf0xxtehfNRB/QTDzzBJrha//Jih+Eb9K2/YpmuNzl3HAu1MvaTMe3Qy+/xDwTX1TVh2nOz/tA+3M+AWxRVX+gmSRmnzbX24EnV9X3FuqXEYcAR42cP/YmYKP2uY6nGaI6DU+kmVTncuA1NH240Af+S2j69CKa4uJZI7/zQu+Xr9AcYbokyS9YM4+hKaI+RHPu2pk057h9afyOVXU2zTmJ36Ypsu4M/O9q/Kwn0Ewc80uagvy9a5h5PNepwBVJ7jHSfAgj74Wq+jLNecIfpTkquSMrn7P6SZqJa06jOS9zvsvb/DPN9vKZ3HANw8+OrH82zfvuUpovkP6hqs4aWf9EmpldJQ1capVzIUiSpD5K8iHge1W11o9EDlWSvwGeXVWPXMPHF80Q6B8sabCVf86dgSOq6l5r8+dI6geLQEmSlol2aO0vaY60/g3Nkc57VdV3usyl+U2rCJSkUeuu+i6SJKknbklzXtjNaIYd/4MFoCRpnEcCJUmSJGlAnBhGkiRJkgbEIlCSJEmSBmTZnhN485vfvHbYYYeuY0iSJElSJ0455ZRfVNWW4+3LtgjcYYcdOPnkk7uOIUmSJEmdSHL+pHaHg0qSJEnSgKy1IjDJfyW5NMmZI22vT/K9JKcn+XiSzUfWvTzJD5Kcm+RBI+17JDmjXfeWJFlbmSVJkiRpuVubRwKPBB481vZFYNequgvwfeDlAEnuBOwH7NI+5u1J1mkf8w7gmcBO7b/x55QkSZIkLdJaKwKr6hvAL8favlBV17aLxwPbtrf3BY6uqt9X1Y+BHwB7JdkauElVfbuaCxq+F3jk2sosSZIkSctdl+cEPhX4bHt7G+CnI+subNu2aW+Pt0+U5JlJTk5y8mWXXbbEcSVJkiSp/zopApO8ErgWeP9c04S71QLtE1XVEVW1Z1XtueWWK82EKkmSJEmDN/VLRCTZH3gYcP92iCc0R/i2G7nbtsBFbfu2E9olSZIkSWtgqkcCkzwYeBnwiKq6emTVscB+STZIchuaCWBOrKqLgauS3LOdFfTJwCenmVmSJEmSlpO1diQwyQeBvYGbJ7kQOJhmNtANgC+2V3o4vqqeVVVnJTkGOJtmmOhzqupP7VP9A81MoxvRnEP4WSRJkiRJayQ3jMhcXvbcc886+eSTu44hSZIkSZ1IckpV7Tne3uXsoJIkSZKkKbMIlCRJkqQBsQiUJEmSpAGxCJQkSZKkAbEIlCRJkqQBsQiUJEmSpAGxCJQkSZKkAbEIlCRJkqQBWbfrAF3b4aBPL/lz/uSwhy75c0qSJEnSUvBIoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA3IWisCk/xXkkuTnDnStkWSLyY5r/3/piPrXp7kB0nOTfKgkfY9kpzRrntLkqytzJIkSZK03K3NI4FHAg8eazsI+HJV7QR8uV0myZ2A/YBd2se8Pck67WPeATwT2Kn9N/6ckiRJkqRFWmtFYFV9A/jlWPO+wFHt7aOAR460H11Vv6+qHwM/APZKsjVwk6r6dlUV8N6Rx0iSJEmSVtO0zwncqqouBmj/v0Xbvg3w05H7Xdi2bdPeHm+XJEmSJK2BWZkYZtJ5frVA++QnSZ6Z5OQkJ1922WVLFk6SJEmSlotpF4E/b4d40v5/adt+IbDdyP22BS5q27ed0D5RVR1RVXtW1Z5bbrnlkgaXJEmSpOVg2kXgscD+7e39gU+OtO+XZIMkt6GZAObEdsjoVUnu2c4K+uSRx0iSJEmSVtO6a+uJk3wQ2Bu4eZILgYOBw4BjkjwNuAB4LEBVnZXkGOBs4FrgOVX1p/ap/oFmptGNgM+2/yRJkiRJa2CtFYFV9bfzrLr/PPc/FDh0QvvJwK5LGE2SJEmSBmtWJoaRJEmSJE2BRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNSCdFYJIXJjkryZlJPphkwyRbJPlikvPa/286cv+XJ/lBknOTPKiLzJIkSZK0HEy9CEyyDfB8YM+q2hVYB9gPOAj4clXtBHy5XSbJndr1uwAPBt6eZJ1p55YkSZKk5aCr4aDrAhslWRfYGLgI2Bc4ql1/FPDI9va+wNFV9fuq+jHwA2Cv6caVJEmSpOVh6kVgVf0MeANwAXAxcEVVfQHYqqoubu9zMXCL9iHbAD8deYoL27aVJHlmkpOTnHzZZZetrV9BkiRJknqri+GgN6U5uncb4FbAjZP83UIPmdBWk+5YVUdU1Z5VteeWW27554eVJEmSpGWmi+GgDwB+XFWXVdUfgY8BfwH8PMnWAO3/l7b3vxDYbuTx29IMH5UkSZIkraYuisALgHsm2ThJgPsD5wDHAvu399kf+GR7+1hgvyQbJLkNsBNw4pQzS5IkSdKysO60f2BVnZDkI8CpwLXAd4AjgE2AY5I8jaZQfGx7/7OSHAOc3d7/OVX1p2nnliRJkqTlYOpFIEBVHQwcPNb8e5qjgpPufyhw6NrOJUmSJEnLXVeXiJAkSZIkdcAiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgbEIlCSJEmSBsQiUJIkSZIGxCJQkiRJkgZk3a4DaHF2OOjTS/6cPznsoUv+nJIkSZJmm0cCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQBZVBCb5y8W0SZIkSZJm22KPBL51kW2SJEmSpBm27kIrk9wL+AtgyyQvGll1E2CdtRlMkiRJkrT0FiwCgfWBTdr7bTrSfiXwmLUVSpIkSZK0dixYBFbV14GvJzmyqs6fUiZJkiRJ0lqyqiOBczZIcgSww+hjqup+ayOUJEmSJGntWGwR+GHgP4B3AX9ae3EkSZIkSWvTYovAa6vqHWs1iSRJkiRprVvsJSI+leTZSbZOssXcv7WaTJIkSZK05BZ7JHD/9v+XjLQVcNuljSNJkiRJWpsWVQRW1W3WdhBJkiRJ0tq3qCIwyZMntVfVe5c2jiRJkiRpbVrscNC7j9zeELg/cCpgEShJkiRJPbLY4aDPG11OshnwvjX9oUk2p7ncxK405xY+FTgX+BDNtQh/Ajyuqn7V3v/lwNNoLk/x/Kr6/Jr+bEmSJEkassXODjruamCnP+Pnvhn4XFXdAbgrcA5wEPDlqtoJ+HK7TJI7AfsBuwAPBt6eZJ0/42dLkiRJ0mAt9pzAT9EcsQNYB7gjcMya/MAkNwHuCxwAUFV/AP6QZF9g7/ZuRwFfA14G7AscXVW/B36c5AfAXsC31+TnS5IkSdKQLfacwDeM3L4WOL+qLlzDn3lb4DLgPUnuCpwCvADYqqouBqiqi5Pcor3/NsDxI4+/sG1bSZJnAs8E2H777dcwniRJkiQtX4saDlpVXwe+B2wK3BT4w5/xM9cFdgfeUVV3A35LO/RzHpkUaZ6cR1TVnlW155ZbbvlnRJQkSZKk5WlRRWCSxwEnAo8FHgeckOQxa/gzLwQurKoT2uWP0BSFP0+ydfvztgYuHbn/diOP3xa4aA1/tiRJkiQN2mInhnklcPeq2r+qnkxzTt4/r8kPrKpLgJ8muX3bdH/gbOBYYP+2bX/gk+3tY4H9kmyQ5DY0E9KcuCY/W5IkSZKGbrHnBN6oqi4dWb6cNZ9ZFOB5wPuTrA/8CHhK+3zHJHkacAHNUUeq6qwkx9AUitcCz6mqP/0ZP1uSJEmSBmuxReDnknwe+GC7/HjgM2v6Q6vqNGDPCavuP8/9DwUOXdOfJ0mSJElqLFgEJrkdzaydL0nyaODeNBO1fBt4/xTySZIkSZKW0KqGdL4JuAqgqj5WVS+qqhfSHAV809qNJkmSJElaaqsqAneoqtPHG6vqZGCHtZJIkiRJkrTWrKoI3HCBdRstZRBJkiRJ0tq3qiLwpCTPGG9sZ/A8Ze1EkiRJkiStLauaHfRA4ONJnsgNRd+ewPrAo9ZiLkmSJEnSWrBgEVhVPwf+IslfA7u2zZ+uqq+s9WSSJEmSpCW3qOsEVtVXga+u5SySJEmSpLVsVecESpIkSZKWEYtASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGhCLQEmSJEkaEItASZIkSRoQi0BJkiRJGpDOisAk6yT5TpLj2uUtknwxyXnt/zcdue/Lk/wgyblJHtRVZkmSJEnquy6PBL4AOGdk+SDgy1W1E/DldpkkdwL2A3YBHgy8Pck6U84qSZIkSctCJ0Vgkm2BhwLvGmneFziqvX0U8MiR9qOr6vdV9WPgB8BeU4oqSZIkSctKV0cC3wS8FLhupG2rqroYoP3/Fm37NsBPR+53YdsmSZIkSVpNUy8CkzwMuLSqTlnsQya01TzP/cwkJyc5+bLLLlvjjJIkSZK0XHVxJPAvgUck+QlwNHC/JP8N/DzJ1gDt/5e2978Q2G7k8dsCF0164qo6oqr2rKo9t9xyy7WVX5IkSZJ6a+pFYFW9vKq2raodaCZ8+UpV/R1wLLB/e7f9gU+2t48F9kuyQZLbADsBJ045tiRJkiQtC+t2HWDEYcAxSZ4GXAA8FqCqzkpyDHA2cC3wnKr6U3cxJUmSJKm/Oi0Cq+prwNfa25cD95/nfocCh04tmCRJkiQtU11eJ1CSJEmSNGUWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQgFoGSJEmSNCAWgZIkSZI0IBaBkiRJkjQg63YdQMvLDgd9esmf8yeHPXTJn1OSJEkaKo8ESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBYBEqSJEnSgFgESpIkSdKAWARKkiRJ0oBMvQhMsl2SryY5J8lZSV7Qtm+R5ItJzmv/v+nIY16e5AdJzk3yoGlnliRJkqTloosjgdcCL66qOwL3BJ6T5E7AQcCXq2on4MvtMu26/YBdgAcDb0+yTge5JUmSJKn3pl4EVtXFVXVqe/sq4BxgG2Bf4Kj2bkcBj2xv7wscXVW/r6ofAz8A9ppqaEmSJElaJjo9JzDJDsDdgBOArarqYmgKReAW7d22AX468rAL27ZJz/fMJCcnOfmyyy5ba7klSZIkqa86KwKTbAJ8FDiwqq5c6K4T2mrSHavqiKras6r23HLLLZcipiRJkiQtK50UgUnWoykA319VH2ubf55k63b91sClbfuFwHYjD98WuGhaWSVJkiRpOelidtAA7wbOqap/HVl1LLB/e3t/4JMj7fsl2SDJbYCdgBOnlVeSJEmSlpN1O/iZfwk8CTgjyWlt2yuAw4BjkjwNuAB4LEBVnZXkGOBsmplFn1NVf5p6akmSJElaBqZeBFbV/zD5PD+A+8/zmEOBQ9daKEmSJEkaiE5nB5UkSZIkTZdFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA2IRaAkSZIkDYhFoCRJkiQNiEWgJEmSJA3Iul0HkLqww0GfXvLn/MlhD13y55QkSZKWmkcCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQNbtOoCk+e1w0KeX9Pl+cthDl/T5JEmS1D8eCZQkSZKkAbEIlCRJkqQBsQiUJEmSpAGxCJQkSZKkAbEIlCRJkqQBsQiUJEmSpAGxCJQkSZKkAbEIlCRJkqQBsQiUJEmSpAGxCJQkSZKkAbEIlCRJkqQBWbfrAJL6bYeDPr3kz/mTwx665M/Zl5ySJElrm0cCJUmSJGlALAIlSZIkaUAcDipJM8Rhq5IkaW3zSKAkSZIkDYhFoCRJkiQNiMNBJUmrzWGrkiT1l0cCJUmSJGlAPBIoSVq2+nLEcqg5PforSd2wCJQkSctGXwpqSepSb4rAJA8G3gysA7yrqg7rOJIkSdIa6Uuxak5peepFEZhkHeDfgQcCFwInJTm2qs7uNpkkSZK0OH0pVvuQsw8ZZ1kvikBgL+AHVfUjgCRHA/sCFoGSJEmSZtKsFqupqiWIsnYleQzw4Kp6erv8JOAeVfXcsfs9E3hmu3h74NwljnJz4BdL/JxLrQ8ZwZxLzZxLy5xLpw8ZwZxLzZxLy5xLpw8ZwZxLbcg5b11VW4439uVIYCa0rVS9VtURwBFrLURyclXtubaefyn0ISOYc6mZc2mZc+n0ISOYc6mZc2mZc+n0ISOYc6mZc2V9uU7ghcB2I8vbAhd1lEWSJEmSeqsvReBJwE5JbpNkfWA/4NiOM0mSJElS7/RiOGhVXZvkucDnaS4R8V9VdVYHUdbaUNMl1IeMYM6lZs6lZc6l04eMYM6lZs6lZc6l04eMYM6lZs4xvZgYRpIkSZK0NPoyHFSSJEmStAQsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAueRZK8kd29v3ynJi5I8pOtci5Vkk64zjFoG/fnArjOMSnLLJLdsb2+Z5NFJduk616g+ZIT+5NQw9WHf2ddtKMlru86wHCV5StcZRvVhGwJIcqMkN2pvr59k9yRbdJ2rj/rymnfN2UEnSHIwsA/NJTS+CNwD+BrwAODzVXVod+kWJ8kFVbV91znA/lxqSf4eOAgI8DrgAOAs4C+Bw6vq3d2la/QhI/Qn50KSfLaq9uk6x6okOaKqntl1jlVJckZV3bnrHNCPfWdftqEkbxlvAp4EvBegqp4/9VCrqUfb+iz9vZz5bQggySOBdwLXAc8CXgH8FtgZ+Ieq+lR36RZnVvbxfXnNFzKtvrQInCDJGcBuwAbAJcC2VXVlko2AE6rqLl3mm5PkRfOtAl5ZVTPxDVKP+vPY+VYB96uqG08zz3za/rwHsBFwPnC7qrokyU2Br1bVbl3mg35khF7l3H2+VcBxVbX1NPPMZ4FvrQN8t6q2nWae+SR59HyrgP+oqi2nmWc+fdh39mgbupDmg+AXaF5ngDcA/whQVUd1k2xFPdrWT59vFbBzVW0wzTzz6cM2BJDkOzSFy0bAd4G7V9W5SW4NfLSq9uw0YKsP+/gevead92UvLhbfgWur6k/A1Ul+WFVXAlTVNUmu6zjbqNcCrweunbBulob69qU/7wP8HfCbsfYAe00/zrz+WFVXc0N/XgJQVb9KMivf6vQhI/Qn50nA17nhw+uozacbZUGX0RQCozmrXb5FJ4km+xDwfpps4zaccpaF9GHf2Zdt6I7A/wUeDLykqn6W5OBZKf5G9GVb3wp4EPCrsfYA35p+nHn1YRsCYG7baY+kntu2nT83RHRG9GEf35fXvPO+tAic7A9JNm7/sO0x15hkM5pD9bPiVOATVXXK+IokT+8gz3z60p/HA1dX1dfHVyQ5t4M887kuyXpV9UfgoXONSTZkdor/PmSE/uQ8B/j7qjpvfEWSn3aQZz4/Au5fVReMr5ixnKcDb6iqM8dXJHlAB3nm04d9Zy+2oaq6CjgwyR7Afyf5NDOUb0RftvXjgE2q6rTxFUm+NvU08+vDNgQ05wRW1XXAU0fa1gHW7y7VSvqwj+/La955XzocdIIkG1TV7ye03xzYuqrO6CDWSpLcHvhlVV02Yd1WVfXzDmKtpC/92RdJtgcubj90jbZvA9yxqr7UTbIVssx8RuhVzscAZ8x9Ozy27pFV9Ynpp1pZkucA/1NV352w7nlV9dYOYq0kyX2A8+f547tnVZ3cQayV9GHf2ZdtaFSSAM8G7lVVf9d1nlF92db7og/bEECaSUzOqKrfjbXvANy7qv67k2Bj+rCP79Fr3nlfWgQuIMlWwDY0h2cvmpWiqq/61J/tWO2qqvGhLjOlDzn7kBH6k1PD05d9p9vQ8PTovdmLnFo6vuarZhE4QZLdgP8ANgN+1jZvC/waeHZVndpNshUlWRd4GvAo4Fa0b3Tgk8C7x7+Z7UqP+nN74HDg/jTZAtwE+ApwUFX9pLNwI/qQsw8ZoT85AZLcAdiXkT9qwLFVdU6nwca0Q24ezIo5P19Vv+4y17gkDwIeyYo5P1lVn+sy16g+7Dv7sg0l2Y7mHPptgM8Cr5/7G5nkE1X1yA7jraAP23qSuwHvYIbfm9CPbQiu32++nGafNDcx1aU0n+cOm6X956zv4/vymkP3fWkROEGS02jG5J8w1n5P4J1VdddOgo1J8kGaN/VRwIVt87bA/sAWVfX4jqKtoEf9+W3gTcBH2pOK58bjPxY4sKru2WG86/UhZx8yQq9yvgz4W+BoVtzW9wOOrqrDuso2KsmTgYNpZmAc/eP7QOBVVfXerrKNSvImmqnX38uK/flk4LyqekFH0VbQh31nj7ahLwIfpTn3+2k05wo9vKouT/KdqrpbpwFbPdrWT2PG35vQq5yfp/ni5Ki6YYKYW9J8nntAVc3EtYr7sI/v0WveeV9aBE6Q5Lyq2mmedT+oqttNO9MkSc6tqtvPs+77VbXztDNN0qP+XCjnvOumrQ85+5ARepXz+8AuE867Wh84a4ZyngvcY/xbzDSXCzhhhvZJE/eP7bli35+h/pz5fWePtqHTauRyFUn+jubIyyOAD1fVfJdmmKoebesz/96EXuVc6PPcvOumrQ/7+D695nTcl84OOtln08wc9l5gboae7Wi+JZ6ZoULAr5I8luYaMtcBpJlK+LGsPG1zl/rSn6ckeTvNkdXRnPsD3+ks1cr6kLMPGaE/Oa+jGfJ9/lj71szWbGdh8mUXrmvXzYrfJdmrqk4ca7878LtJD+hIH/adfdmG1kuy4dzEG1X130kuAT4PzMQ1YFt92db78N6E/uQ8P8lLaY4E/hyuP6ftAG7IPQv6sI/vy2veeV96JHAeSfbhhjH5oRmWcWxVfabTYCPSzBr1OuB+3FD0bQ58leZcjB93k2xlPenP9WmGCY3m/CnwKZpzLFeabaoLfcjZh4zQq5wPBt4GnMcNf9S2B24HPHdWzmNLsj/wLzTDW0ZzPhD4v1V1ZEfRVpDmMgFvBzblhiF32wFX0pwzstJld7oy6/vOHm1DLwROrbFLALXnth0+Q8PterGtw+y/N+f0IWd79Ocgmpxb0RQHPweOBV5XVb/sMN71erSP78Nr3nlfWgROkGTdqpp0AfaZleRmNK/nL7rOMq6P/SnNmvYo/16s+EftpLnzsGZF+2HmQayY8/M1gzNGtufcXJ9z7lycWeG+c5j6sK335b3Zl5x9Muv7+D695l335SxeKHUWXD9EKEnn1zyZT5LXjizuPosFYKsv/fmFkdsv7zLLQvqQsw8ZoVc5H11V11XV8cBXquojVXX8LH0ohObE+6r6VVUdXVVvrKo3tLdn4sPBnCTPBWiLvt9V1cmzVgC2Zn7f2aNt6PAkz5rQ/sIkr+si0yR92dbpwXuz1YucSY4cub1/h1EW1JN9fF9e88770iJwstGxuH/ZWYpVe/DI7Zn5IzZBX/pzy5Hbj+0sxar1IWcfMkJ/cv7TyO0vd5Zi1d4+dyPNrJGz6qkjt9/XWYpV68O+sy/b0MOAIya0vxl46JSzLKQv23of3pvQn5yjM1bOxOzE8+jDPr4vr3nnfWkROJljZJdWX/rTnEunDxmhPzkzz+1ZM5ptw85SrJ5Z7s8+vD/7kBGai9ivNLFK2zZL74G+bOu9ed27DrBIfcnZh328fblIzg462R2SnE7zAu3Y3qZdrqq6S3fRVnCLJC+iyTV3+3pV9a/dxFpJX/rztkmOpck1d/t6VfWIbmKtpA85+5AR+pNzo3YCixsBG7a3r/8DUrNz8dsbtec43Gjk9mjOmZjcANg8yaNoct4kyaNHV1bVx7qJtZI+7Dv7sg1dnWSnqjpvtDHJTsA1HWWapC/beh/em9CfnNsmeQtNrrnb16uq53cTayV92Mf35TXvvC+dGGaCJLdeaH1VjU/d3IkkBy+0vqpeNa0sC+lRf/7VQuvHZ5XrSh9y9iEj9CrnVxdYXVV1v6mFWUCSnzD/9NZVVbedbqLJkrxngdVVVU9dYP3U9GHf2aNtaB/grcBrgLnZX/ekuVbggbMya2CPtvWZf29Cr3IueB5gVR01rSwL6cM+vkev+U/ouC8tAiVJ0rKXZFfgJcCubdOZwBuq6ozuUklSNzwncAFJjhhbPirJO9o/JDMjycPGlvdNco+u8synR/15yNjya5O8rL0Mx8zoQ84+ZIRe5bzl2PLWSTboKk/fJdl9bHnPJNt0lWc+fdh39mEbqqozq2r/qtqj/bf/rBaAfdnW+/DehF7lfObY8rOTPD6Jp2+tpr685l2yCFzYO8eW3wZ8CXhSB1kWcvex5XsA/5Tks12EWUBf+nP8QtEnAtcC/9ZBloX0IWcfMkJ/cr57bPl9wPeSvKGLMPNJcurY8jntv+d2lWke/zC2/DzguCQf6iLMAvqw7+zFNtSjD9m92Nbpx3sT+pNzfGhggHsDs3KeMtCbfXwvXvMu+9LhoJLUY0kC3Kmqzuo6y0LaI0L3rKpPd51lVZJsWlVXdZ1DSy/J31fVO0eWnwPcAbj1DE1iM1FftnUNU5/28bNuWn1pEThBkrtU1ent7fWAlwF70Zw/8JqqurrLfKPmhoxU1SVJtgTuA5w7S38k2m9YnwY8EtiGZvrei4BPAu+uqj92l25FSR7EhJxV9bkuc43rQ84+ZIT+5ByXZIsZmYmtl5LcCJpLBCRZn+Y8sZ/MUp+23wQfXVW/SHI74L+AuwDnAk+flaGMfd2G+mIWt/W+fE5Ksg7wdGBb4HNV9b8j6/6pql7TWbgxSe4A7MuK29GxVXVOp8F6JsnGwHNp+vCtwH7Ao4HvAa+uqt90GG+mWAROkOTUqtq9vf1G4GbAe2j+yN2sqp7cYbzrJfl74CCa4QKvAw4AzqK5OObhVTU+nKQTST4I/Bo4Criwbd4W2B/Yoqoe31G0FSR5E7Az8F5WzPlk4LyqmokLuPYhZx8yQq9y/iXwLpqZxJ5KM8PhjsB6wOOqaiYu2pvklzTDlj4IfKVm9A9MkkfSDBW6DngW8ArgtzTvhX+oqk91l+4GSc6qql3a258G3lVVH0+yN3BoVXV+IeS+bEMASfaimXXvpCR3Ah4MfG9WZgaFXm3rffmc9C5gY5rhyU8Cvl5VL2rXXf87dC3Jy4C/BY5mxe1oP5ovgg7rKtuotlD9N5r35/OBf6Z5zb8P7D8LBWuSY4CfAhsBtwfOAY4BHg7csqpmYjjoTPRlVflv7B/wnZHbpwHrtbcDnN51vpFsZ9Ds3G4G/IbmzQ1wU+C0rvON5Dx3gXXf7zrfqrK0r/t5XefrU84+ZOxZzhOBOwP3An4B3Ltt3x34367zjeQ8l+Yb2P8Ffga8mWZIS+fZxnJ+B7glcBvgSuD2bfutgZO7zjfanyO3TxpbNxN/i3q0DR0MHA+cDPw/4CvAvwDfAF7Zdb6RnH3Z1r8zcnuWPyedPnJ7XeAImi+qNhj9Hbr+R/PBf70J7evP2Hb0DZpi6m+B82mK1LRtX+46X5vxtPb/AJdwwwGvWXtvdt6XTgwz2WZJHpXk/wAbVDtcsZpXbZa+2f5jVV1dVZcDP6yqSwCq6lfMVs5fJXns3PAraIZiJXk88KsOc437XftN8bi7A7+bdpgF9CFnHzJCf3KuV1VnVHMU4LKq+h+4/sLRG3UbbQW/raq3VXOE6l40heDbk/woyWs7zraCqrqkqn4MXFBV57Zt5zNbE6Z9JMmRSW4LfDzJgUm2T/IU4IKuw7X6sg09hmaUzH2B5wCPrKpXAw8CZmI0Sqsv23pfPietP3ejqq6tqmfSFK1fATbpKtQE1wG3mtC+dbtuVmxaVZ+qqg/SfAY9uhqfojkAMTPa9+Jn2v9n8b3ZeV/O2mxYs+LrwNwJ4scn2aqqft6ef/eLDnONuy7Jeu3O96FzjUk2ZLY+yOxHM1z17Unmir7Nga+262bFAcA7kmzKDcMxtqM5UnBAR5kmOYDZz3kAs58R+pNzdHt++di69Zkd189sV1UXAIcDhye5PbO1rZPkRlU1N+Rurm0dZqg/q+qVSQ6gGV67I83Ri2cCnwCe2F2yFRxAP7aha6vqT8DVSX5YVVcCVNU1SWbpQ3ZftvW+fE46OcmDa+T81Kp6dZKLgHd0mGvcgcCXk5xHM5QRYHvgdjSjK2bFOiO3/3Vs3ay8P09OsklV/aaqRvfvOwKzNOFX533pOYE9lmR74OIam1glzXWu7lhVX+om2fzaGY9SVbP0R2IF7R+xbWg+0F44d4R11vQhZx8ywuznTPII4Es1NtlC+0ft/1TV4d0kW1GSf632fJtZluTuwBlV9bux9h1oht/9dyfBeqwH29AJwF9X1dUjXwCQZDPgqzU754b1YlvX0mtHS+3FyHZEMwz8T50GG9HORfH+GptcpZ246rlVdWAnwRYpSWpGCp9Z6EuLwHkkuQmwZVX9cKz9+hmxZkmSLWiOds/S8MpVSvLAqvpi1zlWJckdqup7XedYlT7k7ENG6E9ODVMf9p2ztA0l2aCqfj+h/ebA1jUjM60uB314b0Kvcm4yXihozfTlNZ8Wi8AJkjwOeBNwKc2MXAdU1UntulmaTWp7muFW96eZfTPATWjGuh9UVT/pLNwiJbmgqrbvOseqmHPp9CEjzFbO3HCZlUfRnDfiZVb+DO3Rn5fT5Nyybb6Upj8Pq6pfd5Ns8Wbp/TmfWcyYZCtG3ptV9fOOI62gT9v6fGbxdZ/EnGumD/v4+diXK/KcwMleAexRVRe3J7y/L8krqupjjJzzMgM+RFOsPnFuuEB7TstjaaYZvmd30W6Q5Nj5VtHMbDoTkrxlvlU05zDOhD7k7ENG6E9O4H00X/QcwsqXWflvZmRiiwUuF/D8JPvU7Fwu4BiaL8v2nhu22A5n3B/4MPDADrNdrw/7zr5sQ0l2A/4D2IxmwiKAbZP8Gnh2O/HKLOjLtj7z703oVc75htGHGZrApg/7+B695m+i4770SOAESc6sql1HlrcGjqO5zt0BM3Qk8Lyq2ml1101bOxnM39FcxmKFVcCHqmqr6adaWZKrgBcDKw0ZAt5YVTefcqSJ+pCzDxmhVznPrarbz7Pu+1W187QzTTJfliShuZTArOyTFurPeddNWx/2nT3ahk4D/r6qThhrvyfwzqq6ayfBxvRoW5/59yb0KufvgNcD105Y/cKq2ny6iSbrwz6+R695533pkcDJrkyy49z5gO0Rwb2BjwO7dBlszClJ3k5TnM7NJrUdzTeG3+ks1cqOB66uqq+Pr0hybgd55nMScGZVfWt8RZJDph9nXn3I2YeM0J+cv0ryWOCjIxNa3IjmqP8snQf8uyR7VdWJY+2zdrmA85O8FDhqbjhgO0zwAG7Yl86CPuw7+7IN3Xi8AASoquOT3LiLQPPoy7beh/cm9CfnqcAnquqU8RVJnt5Bnvn0YR/fl9e88770SOAEaWaO27CqvjnWfl/gvlX1mm6SrSjJ+jTnDuzLDbNJ/RT4FM25A5O+mZ26JDsBt6iq/x1rvw/NORk/nPzI6Won17mmqq7pOstC+pCzDxmhVzl3oLnMyv244YPg5jSXWTmomuvddS7JHsDbgUmXC3j2pA84XUhyU+Agmn3nVjTnYvwcOBZ4XVX9ssN41+vDvrNH29BbaC6z8V5W/NL0ycCPq2ompuHvy7beF33YhgDSXEbn8powc3ray290EGslfdjH9+g177wvLQInSHIc8IrxWUCT7AkcXFUP7yZZP/WlP5NsSTMj7Nlj7bsAl1bVZd0kW1EfcvYhI/Qn56h4mZXB6MO+s0/bUJJ9WPFL0wuBY6vqM50Gm8csb+tpprHfqgcftGd+GwJIc33nTce3lyS3AK6sscvZdG2W9/F9ec3ndNmXs3RB8Vmyw/ibB6CqTgZ2mH6cyZJ8YeT2+EVlZ0kv+hN4KzfMFDhqW+DNU86ykD7k7ENG6EnOJK8dWdx9Fj8UAiR5UJLHVNUlVXVKVZ1cVZckeUKSmZhsBSDJkSO39+8wyqr0Yd/Zl21o3ar6bFU9q6oeXlUPa2/PVAHYl22dZlK6SRfevqZdNyv6sA0BvAW4z4T2BwL/NuUs80ryXIC2UPnd3D6+41jjevGaz0JfWgROtuEC6zaaWopVG/3D+9jOUqxaX/rzzpPGkFfV54G7dJBnPn3I2YeM0J+cDx65/brOUqzaq4CV+pNmJs5XTznLQkYnAel8NrsF9GHf2Zdt6PrzbpK8tcsgq9CXbb0XH7TpxzYEcO9qZqBfQVW9H7hvB3nm89SR2+/rLMXC+vKad96XFoGTnZTkGeONSZ4GdD7eeURfxvL2pT/XW8N109aHnH3ICP3J2RcbTxr+1367OUuTb7jvXDp92YZGL+/0l52lWD768kG7D9sQsODlx2b1s/osXTJtVF9e81Gd9KWzg052IPDxJE/khjfMnsD6NBdwnRW3TXM9lIzcvl5VPaKbWCs5kH7053lJHjI+PKg9j+RHHWWapA85+5AR+pPzFmmuI5WR29erqn/tJtZKNmyH3a0wzXmS9ZitD4bbthOFZOT29arq+d3EWsmBzP6+sy/bUF8K/75s6ycleUZV/edo4wx+0D6Q2d+GAC6dNFNkO1HhzJxXC2ye5FE0helNkjx6dOWko5kdOJB+vOad96UTwywgyV8Dc9cLPKuqvtJlnnFJ/mqh9ZOG6HSpB/25M831IL/FijuOewEPq6rvd5VtVB9y9iEj9CrnwQutr6pXTSvLQpIcRjPb5nOr6rdt241pznf5RVW9rMt8c1Z1HmBVHTWtLIsxy/vOHm1DVwM/oCmudmxv0y5XVc3E0NUebetb0Vw26w9M+KA9a+eJzfI2BJBkL+AY4EhW7M8nA/vVhMubdCHJexZYXVX11AXWT1UPXvPO+9IiUBqRZAPgCYzsOIAPzODMXDOfsw8ZoT85+yDJusBrgKcD57fN2wPvBv65qv7YVTatPX3YhpLceqH1VXX+Qus12ax/0O6TdibQ57DidvS2qrq0u1RaziwCl4Ekh1TVISPLrwWuAN5VVZd3FkzSkkrysKo6bmR5X+CSWfmWeE6SjYDbtYs/qBm9hlySZ1bVESPLzwYup7lQ97XzP1Jau/qyrWuYkuxeVaeOLO8JXFxVP+swVi912ZezerKpVs/4+PsTgWuZoWmF+yTJEWPLRyV5R5Jd53tMF/qQsw8ZoT85gbuPLd8D+Kckn+0izAI2q6oz2n/XJNm6PVo0a8ZPxg9wb2AWzmvplb5sQ33JSU+29TTXZBtd/lKSzyZ5WFeZ+izJIWPLr03ysjTXjJwl/zC2/DzguCQf6iJMz3XWlx4JlMYk2aOqThlZvjvNkLa9ZuWcJuhHzj5khP7k7Iskn66qh44sf4nmPKyPVtU/dpdMa0tftqG+5OyLJFtX1cUjy7cCtgbuWVX/3l2yfkry8Kr61MjyI2n2nXetqid3FmyRkmxaVZOuH6nVNI2+tAjsuSQPAh4JbEMz+9lFwCer6nNd5pK0tJJsRnMNsdFt/fNV9esucy1WkgB3qqqzus4CkOQOwL6s2J/HVtU5nQbT4PV9W9fy1u7L92LF9+eJZUGx2rruS4eD9liSN9Fc6PjrwOHA69vbz0/y5g6j9VKSjZO8NMlLkmyY5IAkxyY5PMkmXeeb04ecSW6b5L+SvCbJJkn+M8mZST6cZIeu881J8twkN29v3y7JN5L8OskJSe7cdb45SZ4MnArsDWxMc829vwZOadfNrCRbQDPV2QwVgC8DjqYZ/nkicFJ7+4NJDuoy23IyPvSyS0nWTfL37VDF05N8t739rDSXMJkJfdnWk9yyHUb770luluSQJGckOSbJ1l3n65skWyT5lyRPT+OVSY5L8vokN+0635wkfwOcBxwCPAR4KPAqmkvF/E2H0XpnFvrSI4E9luT7VbXzhPYA36+qnTqI1VtJjgF+SnM9s9sD59BM2fxw4JZV9aQO412vDzmTfAP4ILAZ8HfAe2gy/g3wxKq6X4fxrpfkrKrapb39aZrJlD6eZG/g0KqaiYtKJzkXuMf4kYD2w8EJk/YDXUjyl8C7gOuAp9LMFLojzUXDH1dV3+4w3vWSfB/YZXy20iTr08xw6L5zkeaK/EmrgO9W1bbTzDOfJB8Efg0cBVzYNm8L7A9sUVWP7yjaCnq0rX8O+DRNkfoE4P00+/x9gQdU1b4dxuudJJ8BzgBuAtyxvX0M8ECaoaAz0Z9JzgH2qaqfjLXfBvhMVd2xk2A9NAt96cXi++13mXBxUZoTymdmau4e2bmqHtcW0RfT/CGrJN8EvttxtlF9yLlpVb0DmhkXq+qNbfu7kzy3w1zjRveBt6iqjwNU1deSbNpRpknC5ItdX8fKk5t06d+AxwGb0HxAfGRV/U+S3YG3AjNRVNP026244TIWc7Zu12nxLqPpx9H3YbXLt+gk0WS7V9Xtx9ouBI5vvxSYFX3Z1reqqrfC9fv417Xtb01zwXitnltV1UPav+sXVtXebfs3k5zWXayVrMsNX6KM+hnNl31avM770iKw3w4A3tF+WJ17I20HXNmu0xpoC6rPzI3Jbpdn7pD5jOe8Ls1FpDcDNk6yZ1WdnOR2wDodZxv1kSRHAq8GPp7kQJqZIe8PXNBhrnGHAqcm+QLNUWBoJrN4IPB/O0u1svWq6gyAJJdV1f8AVNWpaS4bMSsOBL6c5DxW7M/bAbP0JUUf/Ai4f1WttL0k+emE+3flV0keSzM50XUASW4EPBb4VafJVtSXbX30dKL3LrBOi3Oj9mjvpsAmSXaoqp+kmRV0/Y6zjfov4KQkR3PD+3M7YD+a68Fq8TrvS4eDLgNJbklzUuncN0iXdBypl5K8Cziwqn4z1r4jcFRV3bubZCvqQ84k9wfeTvPt9TOAFwJ3pRnq8oyq+mSH8VaQ5ACaKZp3BDag2Rl/AnhdVV3RXbIVtR8QHsTItk4zWcTMfIBN8t2qumt7+5FV9YmRdWdW1cxMw98WAHMn5M/150lV9adOg/VMkucA/1NVK41CSPK8uaNFXWvPRX4dcD9uKPo2B74KHFRVP+4m2cp6sq2/Gjh8wt+h2wGHVdVjuknWT0n+FnhTu/hsmr9JBdwJeFWNXM+0a0nuyA2Tas29P4+tqrM7DdZDXfelReAyleQOVfW9rnMsF0nSh5mvZjlnmglYfuWH7OUrySOAL1XV1WPtOwL/p6oO7ybZ4iXZZPyDrZaX9uhKquoXXWeR5iRZh+Z9eW2SdYHdgJ+NXoJDWkoWgctUkguqavuuc/RNkpsAW1bVD8fa71JVp3cUayV9yTlJkgdW1Re7zrEqPcp5RlXNzEymfee+c/Wl55c0cFtfM/EyK1PRly/1k3y2qvbpOsdyMK2+9JzAHkvylvlW0Qxz0WpI8jia4RiXppky/ICqOqldfSSwe0fRVtCXnAt4N805LrNuZnImefR8q4BbTjPLQtpvr58GPIpm4pXrr10KvHt8Ns6uJHnRfKtoJrXRIqW5bMHBwBdoJjSA5pIGr03yqqoaP19sFrmtr6Y0l1n5W5pLrcxNTrctzWVWjq6qwzoLt/x8gdl5f873+SI0Ry61SLPQlx4J7LEkVwEvBn4/YfUbq+rmU47Ua+0MXPtU1cVJ9qI52f0VVfWxJN+pqrt1m7DRh5xJjp1vFXC/qrrxNPPMp0c5/0gzBfukHfZjqmomZjLt0TT8v6O5ruq1E1a/sKo2n26i/urRJQ3c1pdQvMzKklrFl/r7V9VNpplnPkn+RHM96kkz1d6zqmZpArCZNgt96ZHAfjsJOLOqvjW+Iskh04/Te+vOjb2vqhOT/DVwXJJtmfwHuSt9yHkfmusDjp9bFZrJOGZFX3KeDryhqs4cX5HkAR3kmU9fpuE/FfhEVZ0yviLJ0zvI02d9uaSB2/rS8jIrS+spzP+l/t9OOctCzgH+vqrOG18xY7MB90HnfWkR2G+PAa6ZtKKqbjPlLMvBlUl2nDvPrj3StjfwcWCXLoON6UPO44Grq+rr4yvaIwezoi85D6S59Mskj5pijlXpyzT8TwEun2fdntMMsgwcCpyS5IvM9iUN3NaX1oF4mZWl1Jcv9Q9h/kuAPG+KOZaDQ+i4Lx0O2mNJtqSZHOTssfZdgEur6rJukvVTkrsDG1bVN8fa7wvct6pe002yFfUhZ5KdaC6+/r9j7fcBLhqf0KYrfcnZF32Zhj/JhsCm4/vIJLcArqyq33WTrH/aSwLcqv03Os35RTQzG87ENuS2vvS8zMrSSbIFcE1VTfxiX1obvKBnv70V2HJC+7bAm6ecZTk4GJh0XbirgXtMOctC+pDz34CrJrRfww3XQpoFvciZ5PAkz5rQ/sIkr+si0yRV9ZOqenxVbQncC/iLqrpF2zYTBWDrLTTDA8c9kOY9ocV7E/Drqjq6qt5YVW+oqqNp9kdv6jTZitzWl1CSR1fVdVV1PPCVqvpIVR1vAbjG1gFWGsGVZJf2C/+ZkORFSZ42of15SQ7sIFJvzUJfeiSwx5KcVVUTh/9lxi7M3AcL9dksTc3dh5x9yAi9ynk2sOvcEMuR9hsBp8/Ktp7ktVX1ivb2zE67n+TsqrrTPOvm3a9qZT3ahvqSsy/b+qlVtfv4ba2ZJEcD7xgfrpzkQTQTwzyhm2QrSnImzbnffxhr34DmKPBduknWP7PQlx4J7Lf11nCdJttwgXWzNONVH3L2ISP0J2eNfyhsG2dt8o0Hj9yemaMWEyzUZ/5dXD192Yb6krMv23rmua01c+dJ56tW1eeBWSqsarxoaRt/j++D1dV5X/rHrt/OS/KQ8cYk+wA/6iBP352U5Bnjje3h+pVmEexQH3L2ISP0J+fV7TlNK2jbPIdk9V3aXl5lBe35tp5LvXr6sg31JWdftvWNktwtyR7Ahu3t3ef+dR2uh3rzpX6SrRbTplXrui8dDtpjSXYGjgO+xQ1/xPakORfnYVU1S1Oyz7x2w/s48AdW7M/1gUdV1SVdZRvVh5x9yAi9yrkPzTnAr2HFnC8HDqyqz3SVbVSSC4F/pfkW84Xt7etV1b9Oety0tQXgMcCRrNifTwb2q6oTOorWOz3ahvqSsy/b+lcXWF1Vdb+phVkGknwa+Pfx17d9Pzy/qvbpJtmKkjwZeD7N5SxObZv3AA6nyX9UV9n6Zhb60iKw59qxw08A5s4TOAv4gLPbrbk01927vj+r6itd5plPH3L2ISP0I2eSXYGXcEPOM2muJ3ZGd6lWlOTghdZX1aumlWVV2plAn8OK+863VdWl3aXqrz5sQ9CPnH3Y1rW0+vSlfluYHkTz/iyafedhVfXZToP1UNd9aREoSZKk1ZbklqNHUZNsDfyyPa9Jq8Ev9TVtnhO4DCQ5Ymz5qCTvaL9NlLRMJHnm2PKzkzw+ybpdZZokycPGlvdNMiuXL7lexi7CnOS1SV6W5GYdRZKA/mzrwLvHlt8HfC/JG7oI02dV9fuqek9Vvbj991+zWgD2ZR/fB132pUXg8vDOseW3AV8CntRBFklrz/iMYQHuDXysgywLufvY8j2Af0oya8OFxicEORG4Fq8VqO71YluvqoeOLT8AuC3wnm4S9VuPvtTvyz6+DzrrS4eDSpIkabW1E+5sQ3M+00VV9fOOI/Vakj2q6pSR5bsD2wN7VdXLukum5cgisMeSbAw8l2bn+1ZgP+DRwPeAV1fVbzqMJ2ktSXJvYC/gzKr6Qtd5RiXZjOZ6gdd/MAQ+X1W/7jLXqCSPAr5eVb9MsiXwRuBuwNnAi6vqwk4DarCSrE/zt/yiqvpSkicAfwGcAxxRVX/sNGAryW7AfwCbAT9rm7cFfg08u6pOnfxILSdJbkO776yq73Wdp8+66EuHg/bbkcBWwG2AT9PMJPUGmmEj7+gulqSllOTEkdvPoBnyvSlwcJKDOgs2pp3y+lRgb2Bj4MbAXwOntOtmxaFV9cv29tuA7wD7AJ/FYWzq1nuAhwIvSPI+4LHACTRDxt7VZbAxRwIvqKo7VtUD2n93AA7EbWi1Jdk4yUuTvCTJhkkOSHJsksOTbNJ1vjlJPjFye1/gK8DDgWOTHNBRrF6ahb70SGCPJTmtqnZLEuBiYOuqqnb5u1V1l44jSloCSb5TVXdrb58EPKSqLktyY+D4qrpztwkbSc4F7jF+1C/JTYETqmrnToKNSXJuVd2+vX1KVe0xsu60qtqts3AatCSnV9Vd2glgfgbcqqr+NGt/15OcV1UrXdS+XfeDqrrdtDP1WZJjgJ8CGwG3pznyewxNUXDLqpqJOR7G/hZ9C3hiVf04yc2BL1fVXbtN2B+z0JezNsuU1kBb+H2m2oq+Xba6l5aPG7WF1I1ovry7DKCqfpvk2m6jrSA0Q0DHXcfKE1106WtJXg38v/b2I6vqE+015K7oOJuG7UbtkNAb0xxN3wz4JbABsF6XwcZ8tr3A+XtpiheA7YAnA5/rLFV/7VxVjxv5Uv8B7We5bwLf7TjbqNH9+7pV9WOAqvpFkus6ytRXnfelRWC/nZxkk6r6TVU9da4xyY7AVR3mkrS0NqOZyTJAzV2bqx0mNEvF1aHAqUm+wA0fDLcHHgj8385Srey5wCuBc9vlFyb5LfApnFVZ3Xo3zXn969C8Rz+c5EfAPYGjuww2qqqe317oel+a838DXAj8e1V9ptNwPdaDL/XvmuRKmtd7g5G/RevTvGe1eJ33pcNBl6kkKV9caVlrJ4faau4bxFnQHrF8ECt+MPx8Vf2q02DzaCeyWbeqLu86iwSQ5FYAVXVRks2BBwAXVNWJCz5QvZXkXcCB4xP6tV/qH1VV9+4m2eK079M7VtW3u87Sd9PsS4vAnktyE2DLqvrhWPtdqur0jmJJWgucjn3tS3IHZ7nTLJob+dN1jlVJckRVPXPV99RizOqX+km2oDlYOZNf8PVJV33p7KA9luRxNMNGPprkrPZ6MnOO7CaVpKWW5G5Jjge+BhwOvB74epLjk+zeabhFSnJG1xkWaaYuuSGNOLvrAHOSbDHPv5sBD+k6Xx8luUl75G/cTEz8BZBk+yRHJ7mMZtbak5Jc2rbt0HG8XpmFvvScwH57BbBHVV2cZC/gfUleUVUfY7bOE5L053kP8PdVdcJoY5J7tutmYka2JI+ebxVwy2lmWUiSt8y3Cth8ilGkFSR50XyrgJm5VABwGXA+K37WqHb5Fp0k6rH2S/03AZcmWQ84oKpOalcfCczKl30fosn5xKr6E0CSdWguZXI0zbmrWpzO+9LhoD2W5Myq2nVkeWvgOOAomh3IrOw0JP0Z+jIde5I/Au9n8gyhj6mqTaccaaIkVwEvBn4/YfUbq+rmU44kAZDkdzRH+ifN+vvCqtp8uokmS3IecP+qumDCup9W1XYdxOqtJKcB+4x8qf9e4BVV9bHRSwl0bRV/i+Zdp5XNQl96JLDfrkyy49z5gO3OY2/g48AuXQaTtKT6Mh376cAbqurM8RVJHtBBnvmcBJxZVd8aX5HkkOnHka53KvCJqjplfEWSp3eQZz5vAm4KrFQE0gxZ1+pZt6ouBqiqE9vL1RyXZFsmf6nWlVOSvJ3mYMPo36L9ge90lqqfOu9LjwT2WHsO4IZV9c2x9vsC962q13STTNJSm2c69mNnaTr2JPcBzp/n6MCeVXVyB7FW0p6Ef01VXdN1FmlUktsDl1fVLyas28rJoJanNBcLf9LoJH9JNqX5Uv8+VbVBZ+FGtJcveBor/i36Kc3ldd5dVZNGV2iCWehLi8AeS3IczXCB08fa9wQOrqqHd5NM0lJKsiGw6dxF4kfabwFcWVW/6yZZPyXZkmZW5bPH2ncBLh3vZ2la+rKtJ/k7ms+Q7xtrfwbw26r6QDfJ+skv9dUFZwfttx0mXQai/bZ9h+nHkbSWvAW4z4T2BwL/NuUs80pyeJJnTWh/YZLXdZFpHm8FtpzQvi3w5ilnkUb1YlunOaf2ExPaj27XafUcDFwxof1q4B5TzjKvHu3jZ94s9KVHAntsoQkhZmmyCEl/niRnV9Wd5ll3VlXNxDnASc4Gdq2q68babwScPjqRVZcW6rPxCbekaerRtn56Vd1ldddpsoX2O0nOqKqZuExEX/bxfTALfemRwH47qR16sYIkTwNWOqlcUm8tdMmXWdqP1/gftLbxOmbrsjXrreE6aW3ry7a+XpIbjze257Gt30GevttwgXUbTS3FqvVlH98HnfflLO1QtPoOBJ6S5GtJ3tj++zrwdOAF3UaTtIQubacNX0F7Hsksnb92dZKVprVu22ZpEpbzkqx0Qet28p0fdZBHmtOXbf3dwEdGL2rd3j66XafV05cv9fuyj++DzvvS4aDLQDuV8Nxh47Oq6itd5pG0tNoPhcfQXDR47gPBnjSXiNhv/CLyXWmLqLcCr2HFnC8HDpyVmUyT7ExzTdVvsWLOewEPq6rvd5VNw9aXbR2gPZ/p5TQXsS/gt8BhVfWOToP1UJKtaGYC/QMrvu7rA4+qqku6yjaqL/v4PpiFvrQIlKQeaGcHfA4jX/gAb6uqS7tLtbIkuwIv4YacZ9JcO/CM7lKtLMkGwBNYsT8/MCuzL2q4+rKtz0myCc3nyau6ztJ3ffhSvy/7+D7oui8tAiVJkrTakuxeVaeOLO8JXFxVP+swlqRF8JxASeqJJIeMLb82ycuS3KyjSBMleebY8rOTPD7Jul1lmiTJEWPLRyV5R/vtrNSZvmzrwD+MLT8POC7Jh7oIo+noyz6+D7rsS4tASeqP8QkCTgSuZbauHwYrz2wW4N7AxzrIspB3ji2/DfgS8KQOskijerGtV9Uzxpb3r6q70UxQp+WrL/v4PuisLx0OKkmSpDXWnhe4M/Cjqvp1x3EkLYJHAiVpxiV5VJIt2ttbJnlvkjOSfCjJtl3nm0+Seyd5UZK/6TrLqCQbJ3lpkpck2TDJAUmOTXJ4+2FW6kRftvUkbx+5fW/gbOCNwBmTLr+i5S/JU7rO0CdJnp9ku04zeCRQkmZbkrOr6k7t7Q8BxwMfBh4APLGqHthlvjlJTqyqvdrbz6CZ4fDjwN8An6qqw7rMNyfJMcBPaS7CfHvgHJpp+R8O3LKqHA6qTvRoWz+1qnZvb38VeHFVnZrktsAxVbVntwk1bUkuqKrtu87RF0muoLmsyg+BDwIfrqqpXgvUIlCSZlySc6vq9u3tU6pqj5F1p1XVbp2FG5HkO+35QCQ5CXhIVV2W5MbA8VV1524TNub6LEmAi4Gtq6ra5e9W1V06jqiB6tG2PloEjue8fj+g5SXJ6fOtAnauqg2mmafPknwH2IPmC57HA4+gORf4g8DHpnHJFYeDStLs+1qSVyfZqL39SLj+mlJXdJpsRTdKctN2BsPMfatZVb+lmdRiplTzLehn2v/nlv1mVF3qy7Z+hySnJzkD2DnJTQGS3AhYr9toWou2Ap5MM2pi/N/lHebqo6qq66rqC1X1NOBWwNuBBwM/mkYAp3KVpNn3XOCVwLnt8guT/Bb4FLM1k+VmNN9kBqgkt6yqS+YuJt1ttBWcnGSTqvpNVT11rjHJjoAXvFaX+rKt33Fs+bft/1sA/zLlLJqe44BNquq08RVJvjb1NP22wt/EqvojcCxwbPsl0NoP4HBQSeqPJJsB61ZVb751TbIxsFVV/bjrLKuSJOUfRs2APm7rkhYnyc5V9f0uMzgcVJJ6pKquGP1QmOQOXeaZJMlWSXZPcrckW1XV1bNWACa5SXvkb9xMnLcozfK2nmSzJIcl+V6Sy9t/57Rtm3edT9PnzMqrZ6ECcFp9aREoSf32ha4DzGmLvuOBrwGHA68Hvp7k+CS7dxpuRJLHAd8DPprkrCR3H1l9ZDeppFWamW2dZjbdXwF7V9XNqupmwF+3bR/uNJm6cnbXAZaRqfSl5wRK0oxL8pb5VgGbTzHKqrwH+PuqOmG0Mck923V37STVyl4B7FFVFyfZC3hfkldU1ceYrXMXNTA92tZ3qKrXjTZU1SXA65I8dZ7HqOeSvGi+VYBHAlfDLPSlRaAkzb6nAC8Gfj9h3d9OOctCbjxeAAJU1fHtZSJmxbpVdTFAVZ3Yzrx4XHsxbs8HVJf6sq2fn+SlwFFV9XNohoEDB9Bcg1PL02tpRnhMmu3Z0YWrp/O+tAiUpNl3EnBmVX1rfEWSQ6YfZ16fTfJp4L3c8EFwO5opxT/XWaqVXZlkx6r6IUB7RHBvmgvb79JlMA1eX7b1xwMH0Qz3vkXb9nOa2Q0f11kqrW2nAp+oqlPGVyR5egd5+qzzvnR2UEmacUm2AK6pqmu6zrIqSfYB9gW2oRnWciFwbFV9ptNgI9pzADesqm+Otd8XuG9VvaabZBq6Pm3rGp4ktwcur6pfTFi31dxRYa3aLPSlRwIlafatA9yGsZPFk+wCXDp3UfauJdkQOLmqPjvWfoskG1bV7zqKNu5gmvMCx10N3GPKWaRRfdnWXwRcUVXvHmt/HrBOVb2pk2Ba284HNh1vbI8GXzH9OL3WeV86fleSZt9bgS0ntG8LvHnKWRbyFuA+E9ofCPzblLMsZIeqOn28sapOBnaYfhzpen3Z1p8KvG9C+xHtOi1PfdnH90HnfelwUEmacUnOqqqJ56olObOqdp12pkmSnF1Vd5pn3by/w7Ql+UFV3W5110lrW4+29TOqauI1NRdap37ryz6+D2ahLz0SKEmzb701XDdtC11eYZb+3pyU5BnjjUmeBqx0kr40RX3Z1udmA11lm5aVvuzj+6DzvvQFk6TZd16Sh4w3tpOw/KiDPPO5tL3u3graiVhm4lym1oHAU5J8Lckb239fB54OvKDbaBq4vmzrrwc+neSvkmza/tsb+BTwhk6TaW3qyz6+DzrvS4eDStKMS7IzcBzwLW44UrUncC/gYVX1/a6yjWr/oB0DHMmKOZ8M7DfpGoJdaq8PODe87qyq+kqXeaS+bOtwfWF6EM02VMBZwGHjE0Np+ejbPn6WzUJfWgRKUg8k2QB4AiNFC/CBGZpxE7h+ZrPnsGLOt1XVpd2lkvqjL9u6hsl9/NLpui8tAiVJkrTakjysqo4bWd4XuMQjQtLs85xASeqJJEeMLR+V5B1JZmLGwDlJDhlbfm2SlyW5WUeRpF7py7YO3H1s+R7APyVxSOgy5j5+6XTZlxaBktQf7xxbfhvwJeBJHWRZyPgMmycC1+J1pKTF6sW2XlUHjy2/oqoeXlX7dJVJU+E+ful01pcOB5UkSdKiJXkE8AXPU5T6yyOBkjTjkmyc5KVJXpJkwyQHJDk2yeFJNuk635wkj0qyRXt7yyTvTXJGkg8l2bbrfNKs68u2DnwIuDDJ+5I8JMk6XQfS2uc+funMQl9aBErS7DsS2Aq4DfBpmmmk30Bzsdl3dBdrJYdW1S/b228DvgPsA3wWeE9nqaT+OJJ+bOvfA3YCvgG8GLgoyX8k+atuY2ktcx+/dDrvS4eDStKMS3JaVe2WJMDFwNZVVe3yd6vqLh1HBCDJuVV1+/b2KVW1x8i606pqt87CST3Qo2391KrafWT5lsDjgL8Ftq2q7ToLp7XGffzSmYW+9EigJPVENd/afab9f255lr7J+1qSVyfZqL39SLj+ouxXdJpM6pEebOsZXaiqS6rqLVV1L+DeHWXS2uc+ful03pcWgZI0+06eOx+oqp4615hkR+CqzlKt7LnAdcC5wGOBjyW5CngGMzaroTSj+rKtv3C+FVV1/jSDaKrcxy+dzvvS4aCS1GNJUjO4I0+yGbBuVV3edRZpOZjVbV3D5D5+6XTVlx4JlKQeSHKT9mjAuDtPPcwiVNUVo3/QktyhyzxSX/RtWx+X5IyuM2jtcx+/dLrqS48EStKMS/I44E3ApcB6wAFVdVK7boUJGmZVkguqavuuc0izrC/bepJHz7cK+I+q2nKaedQ99/FLZ1p9ue7a/gGSpD/bK4A9quriJHsB70vyiqr6GGMTNHQpyVvmWwVsPsUoUl/1YlunuU7g+5k8Wc2GU86iKXEfv3RmoS8tAiVp9q1bVRcDVNWJ7exhx7UXlJ2l4RxPoblm2O8nrPvbKWeR+qgv2/rpwBuq6szxFUke0EEeTYf7+KXTeV9aBErS7LsyyY5V9UOA9ijB3sDHgV26DDbmJODMqvrW+Iokh0w/jtQ7fdnWDwSunGfdo6aYQ9PlPn7pdN6XnhMoSTMuyd2BDavqm2Pt9wXuW1Wv6SbZipJsAVxTVdd0nUXqo75s6xom9/FLZxb60tlBJWn2Hczki8deDdxjylkWsg5wm/HGJLskcaIIadV6sa0nOTzJsya0vzDJ67rIpKlwH790Ou9Li0BJmn07VNXp441VdTKww/TjzOutwKQ/XtsCb55yFqmP+rKtPww4YkL7m4GHTjmLpsd9/NLpvC8tAiVp9i00295GU0uxaneuqq+PN1bV54G7dJBH6pu+bOtVVddNaLyO2ZrFVEvLffzS6bwvLQIlafadlOQZ441Jngac0kGe+ay3huskNfqyrV+dZKfxxrbN88WWL/fxS6fzvnR2UEmafQcCH0/yRG74ILgnsD6zNRPfeUkeUlWfGW1Msg/wo44ySX1yIP3Y1v8F+GyS17BizpfT/A5antzHL53O+9LZQSWpJ9prhu3aLp5VVV/pMs+4JDsDxwHfYsUPhvcCHlZV3+8qm9Qns76tAyTZFXgJN+Q8k+bagWd0l0prk/v4pTMLfWkRKElaMkk2AJ7AyAdY4ANV9bvuUkmSloL7+KXTdV9aBEqSJGm1JXlmVR0xsvxs4HLgo1V1bXfJJK2KE8NIkpZUkiPGlo9K8o52+Jik5WN8JtAA9wY+1kEWTYn7+KXTZV96JFCStKSS7FFVp4ws3x3YHtirql7WXTJJ0p/LffzS6bIvLQIlSZK0WpLsRXO9wJOS3Al4MPC98dkOJc0mi0BJ0pJIsjHwXKCAtwL7AY8Gvge8uqp+02E8SUskycHAPjSXGvsicA/ga8ADgM9X1aHdpdPa4j5+6cxCX1oESpKWRJJjgJ8CGwG3B84BjgEeDtyyqp7UYTxJSyTJGcBuwAbAJcC2VXVlko2AE6rqLl3m09rhPn7pzEJfWgRKkpZEktOqarckAS4Gtq6qape/6wdDaXlI8p2qutv47Xb5tKrarbNwWmvcxy+dWehLZweVJC2par5d/Ez7/9yy3zhKy8cf2uFsAHvMNSbZDLium0iaFvfxS6fLvrQIlCQtlZOTbAJQVU+da0yyI3BVZ6kkLbX7VtXVAFU1WvStB+zfTSRNgfv4pdN5XzocVJK01iVJ+QdHWvaSbOIEIcPjPn7pTKsvPRIoSVoySW7SfpM57s5TDyOpC2d3HUBrj/v4pdN1X647jR8iSVr+kjwOeBNwaZL1gAOq6qR29ZHA7h1Fk7SEkrxovlXAJtPMoulxH790ZqEvPRIoSVoqrwD2aGcGfArwviSPbtels1SSltprgZsCm4792wQ/Wy5n7uOXTud96ZFASdJSWbeqLgaoqhOT/DVwXJJtceY4aTk5FfhEVZ0yviLJ0zvIo+lwH790Ou9Lv62RJC2VK0fPb2j/wO0NPALYpatQkpbcU4Dz51m35zSDaKrcxy+dzvvSI4GSpKXyAuBWwA/nGqrqqiSvBu7bWSpJS+18muGfK0hyC+CK6cfRlLiPXzqd96VHAiVJS+VgJn8AvBq4x5SzSFp73gLcZ0L7A4F/m3IWTY/7+KXTeV96nUBJ0pJIcmZV7TrPujOqyinEpWUgydlVdad51p1VVQ4NXIbcxy+dWehLjwRKkpbKhgus22hqKSStbQvNXuhny+XLffzS6bwv3VAlSUvlpCTPGG9M8jRgpVkEJfXWpUn2Gm9Mcnfgsg7yaDrcxy+dzvvS4aCSpCWRZCvg48AfuOGP2J7A+sCjquqSrrJJWjptAXgMzUWtR7f1JwP7VdUJHUXTWuQ+funMQl9aBEqSllR7vaO5cx3OqqqvdJlH0tJrZwJ9DiPbOvC2qrq0u1SaBvfxS6fLvrQIlCRJkqQB8ZxASZIkrbYkh4wtvzbJy5LcrKNIkhbJIlCSJElrYnwCixOBa/FagdLMczioJEmSJA2IRwIlSZK0aEkelWSL9vaWSd6b5IwkH0qybdf5JK2aRaAkSZJWx6FV9cv29tuA7wD7AJ8F3tNZKkmL5nBQSZIkLVqSc6vq9u3tU6pqj5F1p1XVbp2Fk7QoHgmUJEnS6vhaklcn2ai9/Ui4/ppnV3SaTNKieCRQkiRJi5ZkPeCVwFPbpm2B3wKfAg6qqgu6yiZpcSwCJUmStEaSbAasW1WXd51F0uI5HFSSJElrpKquGC0Ak9yhyzySFscjgZIkSVoSSS6oqu27ziFpYet2HUCSJEn9keQt860CNp9iFElryCOBkiRJWrQkVwEvBn4/YfUbq+rmU44kaTV5JFCSJEmr4yTgzKr61viKJIdMP46k1eWRQEmSJC1aki2Aa6rqmq6zSFozzg4qSZKk1bEOcJvxxiS7JNmygzySVpNFoCRJklbHW4FJxd62wJunnEXSGnA4qCRJkhYtyVlVtcs8686sql2nnUnS6vFIoCRJklbHemu4TtKMsAiUJEnS6jgvyUPGG5PsA/yogzySVpPDQSVJkrRoSXYGjgO+BZzSNu8J3At4WFV9v6tskhbHIlCSJEmrJckGwBOAufP/zgI+UFW/6y6VpMWyCJQkSZKkAfGcQEmSJK22JEeMLR+V5B1JnB1UmnEWgZIkSVoT7xxbfhvwJeBJHWSRtBocDipJkiRJA+KRQEmSJC1ako2TvDTJS5JsmOSAJMcmOTzJJl3nk7RqHgmUJEnSoiU5BvgpsBFwe+Ac4Bjg4cAtq8rhoNKMswiUJEnSoiU5rap2SxLgYmDrqqp2+btVdZeOI0paBYeDSpIkabVVcyThM+3/c8seXZB6wCJQkiRJq+PkuXP/quqpc41JdgSu6iyVpEVzOKgkSZKWRJKUHy6lmeeRQEmSJK2WJDdpj/yNu/PUw0habRaBkiRJWrQkjwO+B3w0yVlJ7j6y+shuUklaHRaBkiRJWh2vAPaoqt2ApwDvS/Lodl06SyVp0dbtOoAkSZJ6Zd2quhigqk5M8tfAcUm2xdlBpV7wSKAkSZJWx5Wj5wO2BeHewCOAXboKJWnxPBIoSZKk1fEC4FbAD+caquqqJK8G7ttZKkmL5pFASZIkrY6DgSsmtF8N3GPKWSStAYtASZIkrY4dqur08caqOhnYYfpxJK0ui0BJkiStjg0XWLfR1FJIWmMWgZIkSVodJyV5xnhjkqcBp3SQR9JqSpUz+UqSJGlxkmwFfBz4AzcUfXsC6wOPqqpLusomaXEsAiVJkrTa2usD7tounlVVX+kyj6TFswiUJEmSpAHxnEBJkiRJGhCLQEmSJEkaEItASdIaSfKbkds7J/lMkh8kOSfJMUm2SrJ3kiuSfCfJuUm+keRhCzznoUl+OvrcbfsGST7UPv8JSXaY5/F/SnJakrOSfDfJi5LcqF23Z5K3LNGv/2dLsk+Sk9v++l6SN7TthyT5xyX8Od8auf36tm9en+RZSZ68Bs+3eZJnjyzfKslHliqvJGntW7frAJKkfkuyIfBp4EVV9am27a+BLdu7fLOqHta27wZ8Isk1VfXlCU/3KeBtwHlj7U8DflVVt0uyH/A64PETHn9NVe3W/qxbAB8ANgMObi9kffIa/6JLKMmuNL/nQ6vqe0nWBZ65Nn5WVf3FyOLfA1tW1e//jKfcHHg28Pb2+S8CHvNnPJ8kaco8EihJ+nM9Afj2XAEIUFVfraozx+9YVacBrwaeO+mJqur4qrp4wqp9gaPa2x8B7p8kC4WqqktpCqvnprF3kuMAkvxVe8TwtPYo5aZt+0uSnJTk9CSvmnuuJJ9Ickp7FO2Zbds6SY5McmaSM5K8sG3fMcnn2vt/M8kdJsR7KXBoVX2vzXptVb19/E5JntHm+W6SjybZuG1/bPtzv5vkG23bLklObH+n05Ps1Lb/pv3/WODGwAlJHj96xDHJ7ZJ8qX2+U9vfYZMkX26Xz0iybxvrMGDH9ue8PskOSc5sn2fDJO9p7/+d9ssAkhyQ5GNtv5yX5PCFXjtJ0trlkUBJ0p9rV1bvAtGnAi9ZzZ+xDfBTaAqmJFcANwN+sdCDqupH7XDQW4yt+kfgOVX1v0k2AX6X5G+AnYC9gADHJrlvVX0DeGpV/TLJRjQXyv4osAOwTVXtCs0wyfa5jwCeVVXnJbkHzRGz+439/F2BNy7i9/5YVf1n+/yvoTki+lbgX4AHVdXPRn7us4A3V9X7k6wPrDPWF49I8puRI6WHjKx+P3BYVX28PbJ7I5prwD2qqq5McnPg+LaQPAjYdeR5dhh5nue0P+vObfH7hSQ7t+t2A+4G/B44N8lbq+qni+gDSdISswiUJE3bgkfwVuMxi73G0aTH/i/wr0neT1NoXdgWgX8DfKe9zyY0ReE3gOcneVTbvl3bfi5w2yRvpRkO+4W2oPwL4MMjByo3WGTOSXZti7/N2zyfH8l/ZJJjgI+1bd8GXplk2/Z3Gh9SO1F7FHSbqvo4QFX9rm1fD3htkvsC19EU4lut4unuTVOk0g5zPR+YKwK/XFVXtM99NnBr2sJekjRdDgeVJP25zgL2WI373w04px1OOTck89WreMyFNMUX7flzmwG/XNUPSnJb4E/ApaPtVXUY8HRgI5ojXHegKRb/X1Xt1v67XVW9O8newAOAe1XVXWmKxA2r6lfAXYGv0RwBexfN39VfjzzHblV1xwnRFttnRwLPrao7A68CNmzzPwv4p7ZPTktys6r6APAI4Brg80nGjz7OZ76i/Ik053Xu0R71+/ncz1+D54LmCOCcP+EX0ZLUGYtASdKf6wPAXyR56FxDkgcnufP4HZPcBfhn4N+r6k8jhdK/rOJnHAvs395+DPCVqlrwSGCSLYH/AN42ft8kO1bVGVX1OprJYu5Ac5Ttqe3RPJJsk2Zymc1oJqW5ui0W79muvzlwo6r6aPs77V5VVwI/TvLY9j5JctcJ8V4PvGJuqGSSGyV50YT7bQpc3B6Ve+JY/hPafvsFsF1b8P6oqt7S9tddFuqfOW3mC5M8sn3uDdpzDzcDLq2qP7bn9t26fchVba5JvjGXs/3dtqc5YipJmiF+CydJ+rNU1TVpLvvwpiRvAv4InA68gOa8vfsk+Q6wMc0RuefPMzMo7YQhTwA2TnIh8K6qOgR4N/C+JD+gOQK43zxxNkpyGrAecC3wPuBfJ9zvwLaw+RNwNvDZqvp9kjsC326Hcv4G+Dvgc8CzkpxOU9Ac3z7HNsB72nMOAV7e/v9E4B1J/qnNcTTw3bE+Oz3JgcAH24KraIaUjvtn4ATgfOAMbii+Xt9O/BLgy+3zHwT8XZI/ApfQTMCzWE8C3tkekf0j8Fia8wQ/leRk4DRgbhKby5P8bzsZzGeBfx95nrcD/5HkDJr+P6Dt19WIIkla27KKL1IlSZIkScuIw0ElSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQCwCJUmSJGlALAIlSZIkaUAsAiVJkiRpQP4/UKvJkgCnrIkAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1080x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "# Plot the frequencies and unique Disease_ID counts\n",
    "plt.figure(figsize=(15, 8))\n",
    "\n",
    "# We'll use a colormap to have nice color gradations\n",
    "cmap = plt.get_cmap(\"tab20c\")\n",
    "\n",
    "# Increase global font size\n",
    "plt.rcParams['font.size'] = 10\n",
    "# Compute the frequency of each 'diseaseClass' in CF_df\n",
    "counts = CF_df['diseaseClass'].value_counts()\n",
    "# Select the top 20 diseases\n",
    "top_20_diseases = counts[:20]\n",
    "\n",
    "top_20_diseases.plot(kind='bar')\n",
    "plt.title('Distribution of disease containing both C and F (top 20)')\n",
    "plt.xlabel('ICD-10 Disease Classification', fontsize=10)\n",
    "plt.ylabel('Count', fontsize=10)\n",
    "plt.xticks(rotation=90)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "id": "49f05f54-2c6a-4320-bab5-55e8f8e0c2ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2690"
      ]
     },
     "execution_count": 283,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(CF_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "id": "8b684e43-1476-4a4d-84a9-f1b8cb8f0b18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52476"
      ]
     },
     "execution_count": 280,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(merged_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "671c3f9a-8310-46d8-9c45-ba05621a3235",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 找到 'diseaseClass' 包含 'F' 的所有行\n",
    "C_df = merged_df[merged_df['diseaseClass'].str.contains('C', na=False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "C_df.to_csv('C_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "15e2430b-d76e-4074-877f-85024c28aab0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "87883"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(C_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4df0890c-db23-427f-a65c-6b19ad65f86b",
   "metadata": {},
   "source": [
    "### F01\n",
    "Vascular dementia\n",
    "Vascular dementia is the result of infarction of the brain due to vascular disease, including hypertensive cerebrovascular disease. The infarcts are usually small but cumulative in their effect. Onset is usually in later life.\n",
    "\n",
    "Incl.:\n",
    "arteriosclerotic dementia\n",
    "F01.0\n",
    "Vascular dementia of acute onset\n",
    "Usually develops rapidly after a succession of strokes from cerebrovascular thrombosis, embolism or haemorrhage. In rare cases, a single large infarction may be the cause.\n",
    "\n",
    "F01.1\n",
    "Multi-infarct dementia\n",
    "Gradual in onset, following a number of transient ischaemic episodes which produce an accumulation of infarcts in the cerebral parenchyma.\n",
    "\n",
    "Predominantly cortical dementia\n",
    "F01.2\n",
    "Subcortical vascular dementia\n",
    "Includes cases with a history of hypertension and foci of ischaemic destruction in the deep white matter of the cerebral hemispheres. The cerebral cortex is usually preserved and this contrasts with the clinical picture which may closely resemble that of dementia in Alzheimer disease.\n",
    "\n",
    "F01.3\n",
    "Mixed cortical and subcortical vascular dementia\n",
    "F01.8\n",
    "Other vascular dementia\n",
    "F01.9\n",
    "Vascular dementia, unspecified"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c843a770-1427-49ef-9776-d31788e91c37",
   "metadata": {},
   "source": [
    "### F03\n",
    "Unspecified dementia\n",
    "Incl.:\n",
    "Presenile:\n",
    "dementia NOS\n",
    "psychosis NOS\n",
    "Primary degenerative dementia NOS\n",
    "\n",
    "Senile:\n",
    "dementia:\n",
    "NOS\n",
    "depressed or paranoid type\n",
    "psychosis NOS\n",
    "Use additional code, if desired, to indicate delirium or acute confusional state superimposed on dementia.\n",
    "\n",
    "Excl.:\n",
    "senility NOS (R54)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "id": "ef54553b-34aa-4791-8dc0-2afea39562aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 找到 'diseaseClass' 包含 'D' 的所有行\n",
    "D_df = merged_df[merged_df['diseaseClass'].str.contains('D', na=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "id": "d95528eb-bef7-4b9a-a927-6b3e9471f7f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 249,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(D_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f482b7a-4a36-4931-8080-964d7766af5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "C_df = merged_df[merged_df['diseaseClass'].str.contains('C', na=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "743614c5-4454-4b33-a60f-a77f3704b5c7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "47058"
      ]
     },
     "execution_count": 254,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(C_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "id": "1832c72f-6e7a-4709-8de9-f9a0ad025eab",
   "metadata": {},
   "outputs": [],
   "source": [
    "C_df = merged_df[merged_df['diseaseClass'].str.contains('C', na=False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "C_df.to_csv('C_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89921e8d-0211-4fd0-b515-da7ec7392de6",
   "metadata": {},
   "source": [
    "### C04\n",
    "\n",
    "Malignant neoplasm of floor of mouth\n",
    "C04.0\n",
    "\n",
    "Anterior floor of mouth\n",
    "Anterior to the premolar-canine junction\n",
    "\n",
    "C04.1\n",
    "Lateral floor of mouth\n",
    "\n",
    "C04.8\n",
    "Overlapping lesion of floor of mouth\n",
    "\n",
    "C04.9\n",
    "Floor of mouth, unspecified"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "id": "aa0c8fc7-9a44-4887-abd6-9b8f42eae0af",
   "metadata": {},
   "outputs": [],
   "source": [
    "C_df = merged_df[merged_df['diseaseClass'].str.contains('C04', na=False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "#C_df.to_csv('C_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "id": "274e4b55-9de7-42e0-8ca1-d66b8c8adb92",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18922"
      ]
     },
     "execution_count": 289,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(C_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1f4e0157-9bb1-46bf-8c66-20346efdf032",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6AAAAGSCAYAAAD96Kw+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABE+0lEQVR4nO3dd7wkVZ3//9cbhiCGFRATLs4oa8B1dVd05asiYECCYMQcYMWfrGvCxMiqqCCsYlh1UcwouiJGEFcRBUygooIyKiZGRFGBASXHz++Pqss0PX3v7b7TXXfmzuv5ePSjp6tOnfpUdd8z/elz6lSqCkmSJEmSJm29+Q5AkiRJkrRuMAGVJEmSJHXCBFSSJEmS1AkTUEmSJElSJ0xAJUmSJEmdMAGVJEmSJHXCBFSSNKMkByWpJDvMw75PSVJ9y+YtnuliWpskeU6SnyS5oj2PL53vmCYtyeL2WD8637FI0rrOBFSS1gE9X8B7H1ck+UOSryX5zyR3mcB+d2j3ddC4656UtTHmYSV5KPBRYCPg3cAbgNNn2WZ5ez5+n2TjAeunPltfmEDIkqQFZtF8ByBJ6tQ5wKfaf28M3BHYDngT8J9JllbVO/q2eU+7zXmdRbnSs4FN5mG/M1kTYxrWLu3zc6pqxsRzgLsALwTeNt6QJEnrEhNQSVq3/KKqDupfmGQ34CPA25NcXlUfmFpXVRcBF3UX4kpVNR9J74zWxJhGcKf2+U8jbncl8DdgaZIPVNXfxhuWJGld4RBcSRJVdQLwhPbloUluObVuumsuk+yV5NtJLkpyVZLfJflCO8yTdgjryW3x1/cO/+2p45R22S2SHNYO97w+yXN7108Xd5L9kvw8ydVJzm2HEi/qKzPtNaP960aJeUBdWyR5d3serk1yQZKPJlk8oOzy9nHrJO9qy16d5AdJHjnd8U5zDha3+7mg3e/v2jpv11NmhzbmvdtF5/Yf1yyuAw4GNgdeOUJst0lycJJftMd3cfsZud805bdP8pUkl7SfqZ8mecWA9/SmYdJJdkzynXZI+YVJPtR77LPEt1OSjyT5Zbv9ZUm+m+Qp05R/ZDtk/U/t8Zyf5KtJ9ugpc9P1pkm2SfLlJH9r/04+MPW3lWTXJKcnuTLNUPilA/a3cZJXtefhsraec9qYtxzmGCVpTWMPqCQJgKr6dpJTgYcDjwCOm65skhfSDM39Dc3w3MuBO7fb7gB8GzgFWAw8Bzi1fT2dzwH3Br4KXAX8eYiQXwE8rN3/l4A9aIYS/0O7z7k4heFjvkmSLWiupbwb8DXgk8A9aIbr7pbkIVX1y77NNgBOBG4DfBrYFHga8OUk21bVT4bY7z1pzvXmwBdohlj/C/Cidr8PrqoLgeU013s+Drgf8N/ApcMcW4/3A/sDL0vy7qr6yyyx3Q74Js37ejJwQhvnE4FHJXlkVZ3WU34vmvN2JXAMcAmwK/BW4KFJHl9V/QnzdsBSms/qqe3rfYAHJ3lQVV0xyzG9iuY9+x7wB2Azms/Rp5Lcqare2RPf7u1+/gR8EVhBM4T9X9tt+v9elgDfoflcvB/YHngecJskx9Jci/uFdv3jgTcn+WNVHdVTx8eBJ7X1vB+ott4nAB9qY5aktUtV+fDhw4ePBf6gSaoK+MIs5d7Qlntjz7KD2mU79Cz7EXA+sEnf9gE263m9Q7vtQdPs75R2/Q+Av5tufd+yqXiuBO7Vs3xj4Pvtup1min+WYxsq5r5lH2m3eW3f8me3y7/Rt3x5u/yzwAY9y5/TLj9yyPf15Lb8s/qWv65d/uG+5R9tly8e4bOzHLi0/fcz2u3fPdtnC/jfdvnT+pZvDfwV+GnPstvQJMSXA/fuWb6I5keJAp494D262fJ23btY9TM8FeNH+8ouGXC8twTOamPcpGf554BrgC0GbLP5gH0V8O99x/Jj4EbgL8A/96zbErga+EnPsr9ry35uwP42Bm457Hvow4cPH2vSwyG4kqReF7TPwwxhvBa4vndBNVbMYb8HVdVfR9zmY1X1i559X02TUAI8cw4xzEmSDYGn0vSMvaV3XVV9DDgT2DHJ3w/Y/OVVdV3P60/QnNNth9jvVjSJ2I+r6uN9q99C04v8tDa+cfkkTXL2/EFDi3tiux2wF3BCVf1v77qq+jXwAeAfk/xju/hxNAnX+6vq5z1lrwde3b4c1Kv9C5pewl5voOlFn/UzUFXnDlh2BXAUTVL8oL7V17WP/m0uHlD9b4D39pS5nuYHhwDHV9WPe9b9gaYne5ue4cbVlr1qwP6urtl7dyVpjWQCKknqlSHLHUMzFPDsJG9qr4275WwbzeCMOWzz7RmWDbzGcELuRdMjdVpVXTNg/ant8/37ll9aVct7F7RJyp+B2w6x36n6Tulf0Sbjp7dx3XOIuoZSVQUcCGwIvHGGog+k+Y5xq/Y6zZs9aIblQnPuYOZjOZOmN/L+/euA77Qx9Za/mCYxXZLk1jMdT881qj/NyvuiFitn+r1TT/FjaHpHz05yeJLdk9x2hup/0h8bKyd/OmtA+T8B6wN3aI/jb8BXgKcn+WaS/ZM8MMn6Mx2TJK3pvAZUktRr6gv3hbOUewvNNXr7Af/ZPq5O8ilg/6q6ZMT9zng94TRWibGq/pbkapreq65M7Wu661b/1FduynQ9vtfTJCKT2u9qqaoTknwbeEaSt9AMm+23Wfv88PYxnakfLYY5lrsPWD7d53SqntsAlw0q0PYMn0qT2P6QZnjyCuCGdtmeNPdLBaCqjklyPe11sMDLgeuTHA+8tFadHXnQTMHXD7Fug55lT6b523o6K5Pii5K8Azisqm4cdGyStCazB1SS1GsqWZixR7Idavv+qvpnmolYnkLzZf65NF/kRzKgp2gYW/QvaHu8NubmX/CnvqQPSurGkZxN7esO06y/Q1+5cZmv/UIz8c96wJunWT+1z0OqKjM8juorP9OxDDqOVT4DffXMdOx70iSa76+qbavqhVX12mpuU3TaoA2q6rNV9RCaIep7Ap+hmUDouCTDjh4YWlVdXlUHVNVWND3Z/06TdB8CvGTc+5OkLpiASpIASPIwmpk6LwK+Mex2VfXnqvo0zYylvwJ26bmO7Yb2eRLDBh86YNnD2ufeIY6Xts+DblvxzwOWjRrzOTQTyDx4musttx8Q0zic2Vf/TZJsRDM769VtfGNVVd+mmdX2scD/G1DkBzTXMD54yCrPbJ8HHcs/0QxJPrN/HfD/+hO/JJvTDO09t6oG9n62pnpUjx+w7iEzBVtVl1TVcVX1NODrNEO+t5ppm9VVVb+sqvcCj24X7TFTeUlaU5mASpJIsivNBCkAr5ltgpMkjx5wLdomwK1oJiea6nWcmpBoEvcsfHaSqWsISbIxKychOrqn3A/b52cmWa+n/ONoJvHpN1LM7XWfx9AMX35Z77okz6C5LcopA4Zorpa2vlOBBwy4b+Ur2ng+VVXXjnO/PV5D8z6vci1oVf2JpnfwEUn261+fZL0kvUNzv0jTW/n8JFv3lFsf+K/25ccGxHBv4Fl9y14P3IKbfwYGmXo/bpZsJnkCTWLdH/NObWLfu2wRK4cbXz3L/kaS5r6ygyajmurdXWVyIklaG3gNqCStW+7VTgIDzfVtd6TpwboHzS0m9q+qDwxRz6eBy9trAX9Hk3zuRpP0HNxzbdo5NDPrPjXJFbT3Layqw8ZwLN8Avtded/o3mh6he9DMjtvbg/tdmh65RwHfTvJdmnuFPgr4Mk3Pba+5xPwqmuHLhyXZkeY2NfegGZ55Mc21spOwH83ES59M8mTglzQJ787AuaycQXbsquonSf6X5tYs08V2L+CIJM+juUXO5TQ9hdsBt6cZLk1V/TXJC2iSxh+27+mlNO/NP9Lc53VQAnoi8IEkj6Xpfd+O5keFn7EycZ3O8TRJ6KuT3Idm4qL7AI8BPk/z3vV6O3CXJKfQ3JpmfeCRbXxHV9Uw964dxZbAD5L8hOb2LRfQ/L0+nqaX/r/HvD9J6oQJqCStW+5J00METQ/KJTRf1j8GHFVV5w9Zz1Ka5GA7mlto/I3mC/zSqjpmqlBVXZ/kSTTJwN6snHRmHAno4TQJ5ItpZuS9gOb+l4f2FqqqSrI78E5gF5rhkt+nSVR2pS8BnUvMVfWXJP/a7n8PYCeantRPAK8fdLuPcaiqnyd5IE3P76Pbff8J+B+a+2DOZXKnUbyW5nYrG/SvqKqLk2xHc63ik1l5T9QLgO8Ax/aV/98kF9B8tp5Ck5z+BjgAePs01wmfRvM+vYnmfbyK5p6sr56tF7+qLkuyE83n6KHAjjTDfHel+SGlPwE9FHgi8ICeff0GeAHwoZn2NUfLad7XR9D8oLAZzeRKXwfeUlXfm8A+JWniMrd5HyRJkuZHkh2Ak4E3tJMGSZLWEl4DKkmSJEnqhAmoJEmSJKkTJqCSJEmSpE54DagkSZIkqRP2gEqSJEmSOuFtWMbsdre7XS1evHi+w5AkSZKkefHDH/7woqraYtA6E9AxW7x4MWecccZ8hyFJkiRJ8yLJ76Zb5xBcSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInFs13AJIkad21+IATVruO5YftNoZIJEldsAdUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktSJThPQJE9K8tkkv0tyVZJzkhya5NY9ZRYnqWket+2rb+Mkb01yQVvfaUm2H7Df9ZIsTbI8ydVJzkryxGli3DfJL5Jc08b3grGfCEmSJElaB3XdA/oK4AbgNcBjgPcC+wFfS9Ify6HAdn2Py/rKfAjYF3gdsDtwAfDVJPfvK/cm4CDgPcAuwOnAsUl27S2UZF/gSOCzbXzHAkck2W9ORytJkiRJusmijvf32Kq6sOf1qUlWAEcBOwDf6Fn326o6fbqKktwPeDqwT1V9pF12KrAMeCOwR7vs9jSJ72FVdXi7+clJtgYOA77cllsEHAJ8vKoO7Cl3Z+BNST5YVdfN/dAlSZIkad3WaQ9oX/I55Qft85YjVrcHcB1wTE/91wOfAnZOslG7eGdgQ+Dovu2PBu6bZEn7ejtgiwHlPg5sDjx0xPgkSZIkST3WhEmIHt4+/7xv+aFJrk/y1yTHJblv3/r7AOdW1ZV9y5fRJJxb95S7Bvj1gHIA2/SUAzh7lnKSJEmSpDnoegjuzSTZkma47ElVdUa7+Bqa6zBPBC4E7kVzzeh3kzyoqqYS1c2ASwZUu6Jn/dTzpVVVQ5RjQJ395RaMxQecMJZ6lh+221jqkSRJkrSwzVsCmuRWwBeB64G9p5ZX1QVA78yz30ryFZqeyAOBZ05VAfQnlVPL+18PW45pys4oyfOB5wNstdVWo24uSZIkSeuEeRmCm2Rj4DjgbsDOVXX+TOWr6vfAt4EH9ixeweBeyU171k89b5qkP+EcVI4BdW7Wt35QfO+vqm2ratsttthiumKSJEmStE7rPAFNsgHNbU4eBOxaVT8ddlNu3ju5DFiSZJO+ctsA17Lyms9lwEbA3QeUA/hZTzlYeS3odOUkSZIkSXPQaQLa3uvzE8AjgD1nus1K33ZbAQ8Bvtez+DhgA+DJPeUWAU8BTqyqa9rFX6FJSJ/RV+0zgbOr6tz29WnARdOUWwF8Z5hYJUmSJEmDdX0N6P/QJIyHAFckeXDPuvOr6vwkb6NJjE+jmYTonsBS4EbgzVOFq+rMJMcA72x7Vc8F9gOW0JNEVtVfkrwDWJrkMuBHNEnqTsCePeWuS/Ja4IgkfwBOasvsA7yoqq4d76mQJEmSpHVL1wnoLu3zge2j1xuAg2iGwu4HPBe4NU2v5DeAN1TVOX3b7E2TzB4M3BY4C3hMVf2or9yBwOXAS4A7AucAe1XV8b2Fqup9SQp4OfBK4DzgP6rqiNEPVZIkSZLUq9MEtKoWD1Hmw8CHh6zvKmD/9jFTuRtoktSDh6jzSJrbwEiSJEmSxmheZsGVJEmSJK17TEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUCRNQSZIkSVInTEAlSZIkSZ0wAZUkSZIkdcIEVJIkSZLUiUXzHYAkrc0WH3DCatex/LDdxhCJJEnSms8eUEmSJElSJ0xAJUmSJEmdMAGVJEmSJHXCBFSSJEmS1IlOE9AkT0ry2SS/S3JVknOSHJrk1n3lNk3ywSQXJbkiyUlJ7jugvo2TvDXJBW19pyXZfkC59ZIsTbI8ydVJzkryxGli3DfJL5Jc08b3gvGdAUmSJElad3XdA/oK4AbgNcBjgPcC+wFfS7IeQJIAx7XrXwQ8EdgAODnJXfrq+xCwL/A6YHfgAuCrSe7fV+5NwEHAe4BdgNOBY5Ps2lsoyb7AkcBn2/0fCxyRZL/VPG5JkiRJWud1fRuWx1bVhT2vT02yAjgK2AH4BrAH8FBgp6o6GSDJacC5wKuAF7fL7gc8Hdinqj7SLjsVWAa8sa2HJLenSXwPq6rD2/2enGRr4DDgy225RcAhwMer6sCecncG3pTkg1V13ZjPhyRJkiStMzrtAe1LPqf8oH3esn3eA/jjVPLZbvdX4Hhgz57t9gCuA47pKXc98Clg5yQbtYt3BjYEju7b79HAfZMsaV9vB2wxoNzHgc1pkmJJkiRJ0hytCZMQPbx9/nn7fB/g7AHllgFbJblVT7lzq+rKAeU2BLbuKXcN8OsB5QC26SnHgH33l5MkSZIkzcG8JqBJtqQZLntSVZ3RLt4MuGRA8RXt86ZDltus5/nSqqohyjGgzv5ykiRJkqQ5mLcEtO3J/CJwPbB37yqgP1mcWt7/etzlmKbsjJI8P8kZSc648MJBo4wlSZIkSfOSgCbZmGam27sBO1fV+T2rVzC4t3Gq5/OSIcut6HnetJ1dd7ZyDKhzs771q6iq91fVtlW17RZbbDFdMUmSJElap3WegCbZgOY2Jw8Cdq2qn/YVWcbK6zF7bQOcV1WX95RbkmSTAeWuZeU1n8uAjYC7DygH8LOecgzYd385SZIkSdIcdJqAtvf6/ATwCGDPqjp9QLHjgC2TPLxnu9sAj23X9ZbbAHhyT7lFwFOAE6vqmnbxV2gS0mf07eeZwNlVdW77+jTgomnKrQC+M+RhSpIkSZIG6Po+oP9DkzAeAlyR5ME9685vh+IeR5MMHp3klTRDbpfSXKP5lqnCVXVmkmOAd7a9qucC+wFL6Ekiq+ovSd4BLE1yGfAjmiR1J3pu61JV1yV5LXBEkj8AJ7Vl9gFeVFXXjvdUSJIkSdK6pesEdJf2+cD20esNwEFVdWOS3YHDgSOAjWkS0h2r6vd92+xNk8weDNwWOAt4TFX9qK/cgcDlwEuAOwLnAHtV1fG9harqfUkKeDnwSuA84D+q6oi5Ha4kSZIkaUqnCWhVLR6y3Aqansd9Zil3FbB/+5ip3A00SerBQ+z7SODIYeKUJEmSJA1vXu8DKkmSJElad5iASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkToyUgCY5JMldJxWMJEmSJGnhGrUH9MXAb5J8OckeSexBlSRJkiQNZdQE8o7AC4E7AF8Afpfk9Um2HHdgkiRJkqSFZaQEtKquqKojq+oBwL8CJwKvBM5N8vkkj5lEkJIkSZKktd+ch9BW1Q+q6t+AJcB3gT2BE5L8NskLHZ4rSZIkSeo15yQxyd2TvAVYBvw/4PPAM4DTgHcC7xtHgJIkSZKkhWHRKIWTrA88Hvj/gB2BPwPvBY6sqj+2xT6V5FvAfwHPH2OskiRJkqS12EgJKPAHYAvgm8DTgM9X1fUDyv0YuPVqxiZJkiRJWkBGTUCPBY6oqp/PVKiqvsdqDO+VJEmSJC08IyWgVfWiSQUiSZIkSVrYRuqlTPLqJO+eZt27krxyPGFJkiRJkhaaUYfJ7g38ZJp1Z7brJUmSJElaxagJ6FbAr6ZZ91vgrqsXjiRJkiRpoRo1Ab0S2HKadXcBrlm9cCRJkiRJC9WoCei3gFcm2ah3Yfv65e16SZIkSZJWMeptWA4Cvgv8MsnRNPcF3RJ4JrA58NxxBidJkiRJWjhGvQ3LWUl2BA4HXk3Tg3oj8G3giVV11vhDlCRJkiQtBKP2gFJV3we2T3ILYFPgkqq6auyRSZIkSZIWlJET0Clt0mniKUmSJEkaysgJaJK7AXvR3JJl477VVVX/No7AJEmSJK09Fh9wwmrXsfyw3cYQidZkIyWgSfYEjqW59vMvrHrblRpTXJIkSZKkBWbU27AcDJwC3Kmq7lxVS/oed5utgiR3SfLuJKcluTJJJVncV2Zxu3zQ47Z9ZTdO8tYkFyS5qq13+wH7XS/J0iTLk1yd5KwkT5wmxn2T/CLJNUnOSfKCEc6RJEmSJGmAURPQuwGHV9WFq7HPrWmG8F7C7PcNPRTYru9xWV+ZDwH7Aq8DdgcuAL6a5P595d5EcxuZ9wC7AKcDxybZtbdQkn2BI4HPAo+h6fE9Isl+wx6gJEmSJGlVo14D+gua+32ujm9W1R0AkjwPePQMZX9bVadPtzLJ/YCnA/tU1UfaZacCy4A3Anu0y24PvAI4rKoObzc/OcnWwGHAl9tyi4BDgI9X1YE95e4MvCnJB6vqurkctCRJkiSt60btAX0V8Jp2IqI5qaob57rtAHsA1wHH9NR/PfApYOckG7WLdwY2BI7u2/5o4L5JlrSvtwO2GFDu4zSJ90PHGLskSZIkrVNGTUAPoknEfp7k7CTf7HucOub4Dk1yfZK/JjkuyX371t8HOLeqruxbvowm4dy6p9w1wK8HlAPYpqccwNmzlJMkSZIkjWjUIbg3AOdMIpA+19Bch3kicCFwL+A1wHeTPKiqft6W24zmWtJ+K3rWTz1fWlX9s/QOKseAOvvL3UyS5wPPB9hqq62mOSRJkiRJWreNlIBW1Q4TiqN/PxcAvTPPfivJV2h6Ig8EntkuD4Nv/ZIBr4ctxzRlZ4r3/cD7AbbddltvRSNJkiRJA4w6BHfeVNXvgW8DD+xZvILBvZKb9qyfet40SX/COagcA+rcrG+9JEmSJGlEIyegSbZM8vYkZyQ5N8k/tstfmuRfxx/izXfPzXsnlwFLkmzSV24b4FpWXvO5DNgIuPuAcgA/6ykHK68Fna6cJEmSJGlEIyWgSe4D/BR4FvBHYCuayX4A7gq8ZKzR3XzfWwEPAb7Xs/g4YAPgyT3lFgFPAU6sqmvaxV+hSUif0VftM4Gzq+rc9vVpwEXTlFsBfGf1j0SSJEmS1k2jTkL0NuDnNLc1uZomqZvyXeC/hqkkyZPafz6gfd4lyYXAhVV1apK30STHp9FMQnRPYClwI/DmqXqq6swkxwDvTLIBcC6wH7CEniSyqv6S5B3A0iSXAT+iSVJ3AvbsKXddktcCRyT5A3BSW2Yf4EVV1Xu8kiRJkqQRjJqAPhR4WlVdnmT9vnV/Bu44ZD3H9r0+on0+FdiBZijsfsBzgVvT9Ep+A3hDVfXPwrs3cAhwMHBb4CzgMVX1o75yBwKX0/TS3pFmNt+9qur43kJV9b4kBbwceCVwHvAfVXUEkiRJkqQ5GzUBvXGGdbcDrhqmkqrqnwyof/2HgQ8PWddVwP7tY6ZyN9AkqQcPUeeRNLeBkSRJkiSNyaiTEH2fpsdxkL3wGklJkiRJ0jRG7QF9E3BSkhOBT9LMSPvIJC8BHg9sP+b4JEmSJEkLxEg9oFV1KvA4mkl+PkxzW5TDgIcBj6uq702/tSRJkiRpXTZqDyhVdQJwQpKtgdsDFw+YGEiSJEmSpJsZOQGdUlW/Bn49xlgkSZIkSQvYSAlokmfPVqaqPjb3cCRJkiRJC9WoPaAfnWZ59fzbBFSSJEmStIpRE9AlA5ZtDuwOPB145mpHJEmSJElakEZKQKvqdwMW/w74UZIA+9MkopIkSZIk3cxIt2GZxbeA3cZYnyRJkiRpARlnAvpg4PIx1idJkiRJWkBGnQX3dQMWbwj8I03v53vGEZQkSZIkaeEZdRKigwYsu4bmOtBDgENXNyBJkiRJ0sI06iRE4xyyK0mSJElah5hQSpIkSZI6Meo1oFuNUr6qzhstHEmSJEnSQjXqNaDLgRqh/Poj1i9JkiRJWqBGTUD3Aw4E/gZ8GvgzcEdgL+BWNBMRXTPOACVJkiRJC8OoCei9gR8Bj6+qm3pCk7wR+AJw76p62fjCkyRJkiQtFKNOQvQ04Mje5BOgff0+4OnjCkySJEmStLCMmoDeCthimnW3B265euFIkiRJkhaqURPQU4A3J3lg78IkD6K5/vOU8YQlSZIkSVpoRk1A/4NmkqHTkyxP8r0ky4HTgKvb9ZIkSZIkrWKkSYiq6twk9wKeCzwYuBNwNk0CelRVXTf2CCVJkiRJC8Kos+DSJpkfaB+SJEmSJA1l5AQUIMk/AdsDm9PMivunJFsDf66qy8YZoCRJkiRpYRgpAU2yEXA08AQgQAHHA38C3gL8EjhgzDFKkiRJkhaAUSchOgR4JPAs4A40SeiU/wN2HlNckiRJkqQFZtQhuE8D/rOqPplk/b515wKLxxKVJEmSJGnBGbUHdHPg5zPUtdHqhSNJkiRJWqhGTUDPBbabZt2DgHNWLxxJkiRJ0kI1agL6MeCAJM8ANmyXVZIdgZcBHx5ncJIkSZKkhWPUBPQtwAnAx4EV7bJvAycBX6mqd48xNkmSJEnSAjLSJERVdQPw1CT/QzPj7e2Bi2mSz1MnEJ8kSZIkaYEYOgFNsiFwOnBAVZ0IfGtiUUmSJEmSFpyhh+BW1bXAEuD6yYUjSZIkSVqoRr0G9GvAoycRiCRJkiRpYRvpGlDg3cDRSRYBXwAuAKq3QFX9djyhSZIkSZIWklET0KmJhvanue3KIOvPPRxJkiRJ0kI1awKaZCfg+1V1ObAPfT2ekiRJkiQNY5ge0K8B29EkoR9Nsh5wCvBvVfWrSQYnSZIkSVo4hpmEKANePxS49fjDkSRJkiQtVKPOgitJkiRJ0pyMOgmRNBGLDzhhLPUsP2y3sdQjSZIkafyGTUC3THK39t/r9yy7tL+gt2GRJEmSJA0y7BDczwC/ah+/aJd9oWdZ72NGSe6S5N1JTktyZZJKsnhAuU2TfDDJRUmuSHJSkvsOKLdxkrcmuSDJVW292w8ot16SpUmWJ7k6yVlJnjhNjPsm+UWSa5Kck+QFsx2XJEmSJGlmw/SA7j3mfW4N7AX8EPgW8Oj+AkkCHAcsAV4EXAIsBU5Ocv+qOr+n+IeA3YBXAr8FXgh8Ncl2VXVmT7k3Aa8ADmz3/VTg2CS7V9WXe/a9L3AkcChwEvAI4Igkqar3rv7hS5IkSdK6adYEtKqOGvM+v1lVdwBI8jwGJKDAHjQz7e5UVSe3ZU8DzgVeBby4XXY/4OnAPlX1kXbZqcAy4I1tPSS5PU3yeVhVHd7u4+QkWwOHAV9uyy0CDgE+XlUH9pS7M/CmJB+squvGdiYkSZIkaR3S+Sy4VXXjEMX2AP44lXy22/0VOB7Ys6/cdcAxPeWuBz4F7Jxko3bxzsCGwNF9+zkauG+SJe3r7YAtBpT7OLA5TVIsSZIkSZqDNfU2LPcBzh6wfBmwVZJb9ZQ7t6quHFBuQ5rhvlPlrgF+PaAcwDY95Riw7/5ykiRJkqQRrakJ6GY01332W9E+bzpkuc16ni+tqhqiHAPq7C93M0men+SMJGdceOGFg4pIkiRJ0jpvTU1AA/Qni1PLJ12OacpOq6reX1XbVtW2W2yxxSibSpIkSdI6Y01NQFcwuLdxqufzkiHLreh53rSdXXe2cgyoc7O+9ZIkSZKkEa2pCegyVl6P2Wsb4Lyquryn3JIkmwwody0rr/lcBmwE3H1AOYCf9ZRjwL77y0mSJEmSRjTMfUDnw3HA3kkeXlWnAiS5DfBY4JN95d4APBk4qi23CHgKcGJVXdOW+wpNQvqMtvyUZwJnV9W57evTgIvacif1lVsBfGdcByhJktYsiw84YbXrWH7YbmOIRJIWrnlJQJM8qf3nA9rnXZJcCFzYJpzH0SSDRyd5Jc2Q26U012i+ZaqeqjozyTHAO5NsQHOf0P2AJTRJ5FS5vyR5B7A0yWXAj2iS1J3oua1LVV2X5LXAEUn+QJOE7gTsA7yoqq4d86mQJElahcmwpIVqvnpAj+17fUT7fCqwQ1XdmGR34PB23cY0CemOVfX7vm33Bg4BDgZuC5wFPKaqftRX7kDgcuAlwB2Bc4C9qur43kJV9b4kBbwceCVwHvAfVXUEkiRJkqQ5m5cEtKr6JwMaVGYFTc/jPrOUuwrYv33MVO4GmiT14CH2fSRw5GzlJEmSJEnDW1MnIZIkSZIkLTAmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqROL5jsAaU2z+IATxlLP8sN2G0s9kiRJ0kJhD6gkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRPOgitJWrDWpFmt16RYJEmaL/aASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRPeB1SSNFbe71KSJE3HHlBJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktQJE1BJkiRJUidMQCVJkiRJnTABlSRJkiR1wgRUkiRJktSJRfMdgCRJ6tbiA05Y7TqWH7bbGCKRJK1r1tge0CQ7JKkBj0v7ym2a5INJLkpyRZKTktx3QH0bJ3lrkguSXJXktCTbDyi3XpKlSZYnuTrJWUmeOMFDlSRJkqR1whqbgPZ4MbBdz+ORUyuSBDgOeAzwIuCJwAbAyUnu0lfPh4B9gdcBuwMXAF9Ncv++cm8CDgLeA+wCnA4cm2TXcR6UJEmSJK1r1oYhuD+vqtOnWbcH8FBgp6o6GSDJacC5wKtokleS3A94OrBPVX2kXXYqsAx4Y1sPSW4PvAI4rKoOb/dxcpKtgcOAL4//8CRJkiRp3bA2JKAz2QP441TyCVBVf01yPLAnbQLalrsOOKan3PVJPgUckGSjqroG2BnYEDi6bz9HAx9OsqSqzp3c4UjS3Hldn6SFbBxtHNjOSfNtbRiC+4kkNyS5OMknk2zVs+4+wNkDtlkGbJXkVj3lzq2qKweU2xDYuqfcNcCvB5QD2GauByFJkiRJ67o1uQf0r8DbgFOBvwH/DLwGOC3JP1fVX4DNgOUDtl3RPm8KXN6Wu2SGcpv1PF9aVTVLOUmSpHWCoyskjdMam4BW1Y+BH/csOjXJN4Hv0wyt/U8gQH+ySLu8//U4y918ZfJ84PkAW2211UxFJUmSJGmdtTYMwb1JVf0I+CXwwHbRCgb3Sm7aPl8yZLkVPc+btrPrzlSuP673V9W2VbXtFltsMfNBSJIkSdI6aq1KQFu9vZTLaK7b7LcNcF5VXd5TbkmSTQaUu5aV13wuAzYC7j6gHMDPViNuSZIkSVqnrVUJaJJtgXsA32sXHQdsmeThPWVuAzy2XUdPuQ2AJ/eUWwQ8BTixnQEX4Cs0Cekz+nb9TOBsZ8CVJEmSpLlbY68BTfIJmvt5/gi4lGYSoqXAH4B3t8WOA04Djk7ySpoht0tpeknfMlVXVZ2Z5BjgnUk2aOvdD1hCT7JZVX9J8g5gaZLL2n0/BdiJ5rYukiRJkqQ5WmMTUJrbqzwNeBGwCfAn4HPA66vqIoCqujHJ7sDhwBHAxjQJ6Y5V9fu++vYGDgEOBm4LnAU8pr2utNeBNDPnvgS4I3AOsFdVHT/uA5QkSZKkdckam4BW1aHAoUOUWwHs0z5mKncVsH/7mKncDTRJ6sFDBytJkiRJmtVadQ2oJEmSJGntZQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSerEovkOQNLaYfEBJ6x2HcsP220MkUiSJGltZQ+oJEmSJKkTJqCSJEmSpE6YgEqSJEmSOmECKkmSJEnqhAmoJEmSJKkTJqCSJEmSpE6YgEqSJEmSOmECKkmSJEnqxKL5DkCSJEmSFqrFB5yw2nUsP2y3MUSyZrAHVJIkSZLUCRNQSZIkSVInHIIrrcEcsiFJkqSFxB5QSZIkSVIn7AGdB/ZqSZIkSVoXmYBKWuv4I44kSdLaySG4kiRJkqROmIBKkiRJkjphAipJkiRJ6oQJqCRJkiSpEyagkiRJkqROmIBKkiRJkjphAipJkiRJ6oQJqCRJkiSpEyagkiRJkqROmIBKkiRJkjphAipJkiRJ6oQJqCRJkiSpEyagkiRJkqROmIBKkiRJkjphAipJkiRJ6oQJ6ABJ/j7JZ5L8NcnfknwuyVbzHZckSZIkrc1MQPsk2QT4BnAv4DnAs4B/AE5Ocsv5jE2SJEmS1maL5juANdC+wN2Ae1bVrwGS/AT4FfD/AW+fx9gkSZIkaa1lArqqPYDTp5JPgKo6N8l3gD0xAZUkSdIaYvEBJ4ylnuWH7TaWetYU4zgvC+2crClMQFd1H+CLA5YvA57ccSySJEmSNBZrQmLuNaCr2gy4ZMDyFcCmHcciSZIkSQtGqmq+Y1ijJLkWeFtVLe1bfgjw6qpapdc4yfOB57cv7wmcs5ph3A64aDXrGBdjGcxYBjOWVa0pcYCxTMdYBjOWwdaUWNaUOMBYpmMsgxnLYGtKLOOK465VtcWgFQ7BXdUlNL2g/TZlcM8oVfV+4P3jCiDJGVW17bjqWx3GMpixDGYsa24cYCzTMZbBjGWwNSWWNSUOMJbpGMtgxjLYmhJLF3E4BHdVy2iuA+23DfCzjmORJEmSpAXDBHRVxwEPTnK3qQVJFgMPaddJkiRJkubABHRVHwCWA19MsmeSPWhmxf09cGRHMYxtOO8YGMtgxjKYsaxqTYkDjGU6xjKYsQy2psSypsQBxjIdYxnMWAZbU2KZeBxOQjRAkq2AdwCPAgJ8HXhpVS2fz7gkSZIkaW1mAipJkiRJ6kZV+ZjwA9gO+DTwR+Ba4GLga8BzgPXbMpsCH6SZ9vgK4CTgvgPqejNwYltHAc+dr1j66l3axvPteTovNc3j/vNxXoB7A8e2Za+iuTXPS7qMBThohvNy9Ty8R1sBRwHnAVcCvwQOBm45D7EsAT4DXNqWOxnYdlyfXeDWwOHAKcDf2nO+wzR1bQy8Fbig/aycBmw/D3FMtG0ZNhZgW5rhP79oPyfnAZ8AlsxDLHeluQTjd+17c1G7zS7z8Vnpq3fsbe6In5eJtrmjnhcm2OaO8Hk5aIbzMpY2d8T3aM5t7pjjmHR7+wjgaOA37Xv/G+C9wO0H1DXn9nYCsUy6zR0qFrppc4eNpYs2d+j3qK/eSbS5o3xeJt3mjnReWI02t6pMQCf9AF4K3EjzRfhZwPbAnsD/tH/oe9IM8/0WcD7wNOAxwKntm3qXvvoua8sexYgN1rhj6an3bsDlwJ+H/cOcwHkp4CPAg/sem8xDLNvS/Id8HPA4YEea+8Tu32UswF0GnI9HANcBn+44llvSfPk5l6ax2xF4FU2jdUzHsWwO/IHmP9unAI+l+UJ0GXDvMcWyGFjRlvksM385+wTNF7N92/fnc+15uX/HcUy6bRkqFpovtd8B/h14OPB04Oc0/1n+fcex3Af4UFvPju22X2rLP6Hrz0oHbe4on5dJt7mjxDLpNnfYz0sXbe6wscy5zR1zHF20t8cC/wfsTdNmPK/d52+BW42jvZ1QLJNuc4eKhW7a3GFj6aLNHfo96qDNHeXzMuk2d5RY5tzm3lTHsAV9jP5o3+AbgXdNs/7uwD+1b3wBO/as+zuaxv1dfdus1z5vzQgN1iRi6Vn/VZoJmk4Z5g9zQuelgIPn+z2imdhrGfD5+Y5lmjqe1W67W8fn5dFtuUf31XMYcD0zNKATiOU/231u3bPsljT/scz4JXGEWNKz7JFM/+Xsfu26vXuWLaL5JfG4ruKY+uy2z5NqW4Y9J1sMWHbXdh9v7DKWaepYRDMp3fHzFQuTa3NH+bxMus0d9vPSRZu7Op+Xcbe5w56XObW5E4iji/Z2UJuxfRvTPj3L5tTeTiKWqc9u+zypNnfY89JFmzv0eRlQbtxt7sixMLk2d5TPy6Tb3GE/L3Nuc29W9+ps7GPWN/3LNL0vG89S7kPAHwYsPwr43TTbjNpgTSQWml/KLgQ2G+EPc+yxrMYf5lhjAXZqY3nYfMcyzbYnAX8CFnV8XnZvz8uD+8od0DaM0w4Jm0AsXwJ+PqDcZ2h+CZz23AwbS982M305ey3NUJhN+pa/AbgG2KiLOPrKTaRtmUssfdv8GfjQGhLL2cDn5iMWJtjmjhILE25zh42FDtrc1fm8MOY2d4TzMqc2dwJxdNre9mx7izam1/Ysm1N7O4lY+tZPvM0dNpa+shNpc+cYy0Ta3GFioaM2d7ZY6LDNnSkWVqPN7X14G5YJSbI+sANwYlVdPUvx+9D8cfVbBmyV5FZrYixJNqWZLfhVVbViPmNp7ZfkmiRXJvlGkofNQywPbZ83TnJ6kuuS/CXJu5LcouNY+vdxF5phEp+oqus7juUk4FfAfyXZJsmtkuwEvAR4X1Vd0WEsN9B8Cel3DU1je/cxxDKs+wDnVtWVfcuXARvSfDHpIo456SqWJPcGbk8zLKzzWJKsl2RRkjsmeS1wD5qhS53G0kGbO6pJtrnD6qLNnZMJtbnDGrnNnVAc89XePrx97m0zRm5vJxjLnHQVS0dt7rSxzEObOzCWeWpzZ3qPum5zB8Uypza3nwno5NyOpnH93RBlNwMuGbB86sO+6Roay1tprjH56BoQy9E01y88kmYc+ubAN5Ls0HEsd26fj6GZXOBRwFtoxtJ/suNY+j2L5m/+qFnqH3ssbcP3UFYO3biM5vZGXwL+o8tYaIZb/UOSzacKJFkPeFBPPasby7Bmi3lQLJOIY64mHkuSRcD7aH6B/tA8xfIWmuv4LqC5ju6pVfX1eYhl0m3uKCbd5g6rizZ3ribR5g5ljm3uJM5J5+1tklsD76T50vyFnlVzaW8nFctcTTyWLtrcIWLprM2dJZZO29xZYum0zZ0hlrm2uTezaNSANBGh6c4etLxrQ8XS/urybOBfqu2Tn69YAKrqWT0vv5XkizS9YQez8teaLmKZ+lHn6Kp6XfvvU9pfoQ5Lsk1V/ayjWPo9G/hxVf1kNfc/cixJNqZprG5P86XsPJovIK+juT5ov65iofmP9cXAx5K8mGYY2IE0MzVCMzytK2vS3/6a6j3A/6O5hm7Ql8cuvBP4FHBHmr+jTyZ5UlV9qasAOmpzh9ZBmzusLtrcuZpEmzuUjtrcYXTa3rbJ0/8CWwIP6et57rS9nSWWTo0Yy0Tb3CFjeScdtLkzxdJ1mzvbeemyzZ0llrG0ufaATs7FNDOr3XWIsisY/MvbVI/N6jYAk4jlSJpfxs5Pctskt6X5QWP99vVGHcayiqq6DDgBeOAM9U8ilovb56/1lTuxfb5/h7HcJMmDgHsx+y/xk4rl32iGgexaVUdX1Ter6nDg5cALktyvq1iq6rfAM4AHAL+mmZJ8O5phNtD84rq6sQxrtpgHDfmZRBxzNdFYkhxK80vvPlV14izFJxZLVZ1fVWdU1Zeqai/gdJqZI7uMpYs2d84m0OYOq4s2d2QTbHOHNZc2d+xxdNnetj2rR9H0ED1uQOI/l/Z2UrHM1URjmXSbO2wsXbS5Q8TSWZs7l8/LpNrcIWKZa5t7MyagE9L+WnAK8KgZPqRTltFcm9BvG+C8qrp8DYzl3sALaL7UTz0eQjMl9CVM8+tqx+dlul87JxnLsqnqB8QC0/za28F5eQ7Nr96zDo+YUCz3BS6pqt/0lft++3zvDmOhqj5L88veNjSzMz4AuBXw+6o6bwyxDGsZsCTJJgNivpbmC1sXcczJJGNJciDNhCkvqaqPz2csA5zBNNeLTTCWLtrc1TXONndYXbS5czGpNndYI7e5kzonHba376O51ct0wzVHbm8nGMucTDKWjtrcuZ6XSbS5s8XSZZs71/MyiTZ3mL8jBux3xjZ3UHA+JvRg9qmPl9BMffy49o18eM+629D8yvDuabYd97TdI8VC88tq/+NM4KftvwfeM3TS56Wv3HnAqR2fl82Bq4H39NUzdQPjrbuKpWfdhu26L87XZ5eVN2jfuq+e5zPLbGodfV7uTPPr99JxnJe+ZTPNEHn/dt1zepYtornmYtop58cdR1+5ibQto8RCM2SvgNcM+5md9HnpKb8eza/x53T8WdlhwONMxtjmruZ5GWubO8J5mXibO+p5YYJt7gjn5aBBx88sbW5Hn5Wxt7fA29qyz5qhvvszh/Z2ErH0bTuxNnfYWOigzR31vPTUMfY2d8jPyw4DHmcy5jZ3Nc7L2NvcIc/LnNvcm5Uf5WB9jP5g5c1fv0YzDOVhwB7AfwNX0Ny7cD3guzT3OXoqsDPNLxYr6LsJMM2MVE+imUSgaMbqPwl4UtexDKj/FEa/Qe9qxwK8AvgAzVTZO9D88vxTml80Z50megLv0etpfvl+M81/zAfQDIH46Hy8R8AT2s/KrDdxnuB7tJjmpsW/ZOVN0V/ZLjuD9n5oHcWyAc3wr8fRTCf+IpphYd8CNhzHeWnL7ULzt/mG9vy/vn29S199n6L5NfV5NDdG/wxN4/4vHccx0bZl2Fja9+5Gmhti999we5uOYzkIeBfNr8EPb59PbOt/ateflUm3uSOcl4m3uSN+difa5o76HjHBNneE92gxc2xzxxzHxNtb4NXt/j/Eqm3G3fvqm1N7O6FYJv19bqhY6KDNHSGWg5hwmzvKezTpNneE89LF99xRPrtzbnNvqmOUxtHH3B40F3MfS3Otw3U0X4hPBJ7JyhsRbwZ8uF13Jc1sdfeb5sNfgx5dxzJNbEP9YY4zFuCxwHdo7nN0Hc0vz8cBD5qn9yjA/jTDea6lmX3sjcAG8/EeAV9sz8ms/9lP+LxsA3yaJkG8iuaL0eHAph1/XhbRzAT5Z5pbAfyG5iL+gTdmX41YljP4b3V5X123AN5Oc6/Aq4HvMXxPwjjjOGWacuNsW2aNhWa2wYFxAKd0HMsewDeAv7Sfld/RtC0PmY/PyoC6T2H8be4w56WrNneo80I3be7Q7xGTb3OHPS9zbnPH+FmZeHvLDG0XfV+IWY32dgKxTFu2y1jooM0dIZaJt7mjvEcD6j6FMba5I5yXibe5o5wXVrPNrSrSViRJkiRJ0kQ5CZEkSZIkqRMmoJIkSZKkTpiASpIkSZI6YQIqSZIkSeqECagkSZIkqRMmoJIkSZKkTpiASpJI8twklWTrvuVJ8owkX09ycZLrkpyf5FNJduwpd1C7/dTjsiS/SvLJJDuPEMddkrw7yWlJrmzrWjxN2U2TfDDJRUmuSHJSkvsOuZ/eWK9LcmGSbyV5bZLbDyh/SpJThj2OtVGSWyZZmuRH7ft3dZJzkryn93ORZHmSj85TjJXkoL5l+7SftWuTXNoum1iMSV6a5AkDlh+UxHvbSdIsFs13AJKkNVOS9YFPAY8HjgLeTXPz6r8Hngx8PcmmVfXXns0eCtwAbAIsAZ4EfCXJ0cBzqurGWXa7NbAX8EPgW8Cjp4ktNDfiXgK8CLgEWAqcnOT+VXX+EIf4UeBImh9jNwce3Nb14iR7VtV3e8r++xD1rbWS3Ak4Cbgz8B7g2zQ3GN8G2Ad4CPDP8xbgStsBN723Se4MvB/4BLA3cHW76vHA3yYUw0tpzs/n+pZ/EPjKhPYpSQuGCagkaTpLaRLIJ1XVZ/vWfSLJo4Hr+pZ/r6qu73n9oSQvA94OnAm8bZZ9frOq7gCQ5HlMk4ACe9AkuztV1clt+dOAc4FXAS+eZT8Af6iq03teH5/kXTSJ7+eS3L2qrgCoqp8NUd/a7OPAnYAHVdWvepafnOQIYM/5Cevm+t4vgH8A1geOqqpv95T7caeBNfs8n57kWJI0mENwJUmrSLIh8HLghAHJJwBVdWJVXTlbXVX1DuDHND1Hs5WdrYd0yh7AH6eSz3bbvwLHsxrJUlX9GXglcAfgqVPL+4fgJrlVO1T4vCTXJPlzOwT4Xj1lFrVDWn/Rlvljkrcl2bh3n0ne0A57/Ws7nPgbSR7cV2Zs++uX5EHAI4A39yWfU+ekquoLM2y/RZIjk/yyHTb9+3bo9ZZ95e6R5PNJ/tIO7z0vybFJFo1wjDcNwW2H2E69J19v1320XbfKENwkS5J8PMmf2vp/m+S/e9Y/MMln0gwxv6odfvzmJLfoKbMcuCvwjKwcwj21z1WG4Ca5TTuE+Y/tPs9J8rK2B3+qzA5tPXu0ZS9KMyT86CS3ne68S9Layh5QSdIg2wK3pRnmOg7/B7wmyVZVdd4Y6rsPcPaA5cuAZye5VVVdPse6TwSupxl2+qFpyryDJgl+DfArmiG8D6E5Z1OOBh4L/BfwXeDewJuAxcATe8pt2dZ3PnBL4JnAN5NsW1U/mcD++j2yfZ7re70ZzdDXpcCFNMN4Xw58J8m9qmpqWOyXgEuB/YCLaI57V1b+GD7MMfZ6E81Q7XcBLwR+1O5/FUmWAN8HrgRe39b/99y8h30rml76jwKX0XzGXgfcjZU/Rjwe+DJwFnBQu2y6fa4HnAD8S1vPT4HdaEYDbNEeZ6//pjlHTwfuCbyFZjj7c6Y5fklaK5mASpIG+fv2+Xdjqm8q6bxTz79Xx2bA8gHLV7TPmwJzSkCr6qokF9HEOp3tgE9UVW+C+vmpfyR5GPAUmuteP9YuPinJCuDo9jrVM9v9Pa9nu/VpriNcBvwb8JJx72+A1Xqvq+qcnjinjuE7NO/zLsDnk9yOZrjsnlXVm+h+suffMx7jgP3+JsnP25c/GzA8t9cbgFsA96uqP/YsP6qnvpt6+tseyu/QXEf6sSQvrKqLq+rHSa4BLpplf9Ak1w8F9q6qj7bLTkxyS+DlSd5eVRf1lP9mVb2op9w9gecleW5VObmRpAXDIbiSpC5MDTksuGm4aO8jM2w7XX2DvpSPWs+o9U/5AfDcJK9Jsm2bdPV6DM0kPp/tPU6a3lWA7W/aUfLIJCcnuZim5/U64B40vWBj398kJNkvyVlJLm+PYepHhqljuBj4LXBYkn2T/MOAamY7xtXxaOBLfcln/zHcJsl/JfkNcA3N+/Bxms/CoHhnsz1wI/C/fcuPBjakSbh7ndD3+qfARjTDwSVpwTABlSQN8vv2+a5jqm+ql+2C9vm6vsfDR6xvBU0vaL9N2+dLRg1wSnvN3+1YGesgL6KZQXcfmsTpL0nekWSTdv3taZKMy7n5cf6lXb95u69/oRnSeTlNj+eDgQfSDPHsvXZzLPubxmq910leBBxBM4vuE4AHtcfB1DG0PXiPAs4ADgV+2V6Dud8Ix7g6Nmf2CYI+AryAZkjvo2jehxf2HseINgNWVNU1fcv/1LO+14q+11PbzWXfkrTGcgiuJGmQM2iu13sszW0uVteuwHlVNZXsPLBv/Tkj1reMwTPkbtPuZ67XfwLsTDOz6renK9DWvxRYmuSuNLMFH0bTC/lqmh6/q4GHTVPFVE/cE2l6DJ9QVTfNKJxkU5rzP+79DXIScAjNez3bLMWDPBX4elW9vCf+Jf2Fquq3NNfnBrgf8B/AEUmWV9X/DXGMq2PqmtOB0kzUtCdwUFX1Tkw01H1lp7EC2CzJhlV1bc/yO7bPF69G3ZK01rIHVJK0ivYL89uA3ZMMnMAmyaOG6Z1KcxuW+9NMvjJV/xl9j8tGDPE4YMskN/WcJrkNTRI154mTktyeZvKXC2jugTqrqvpdVb2NZsjkP7aLv0LTc/V3A471jJ6hoJvQTDRz03DfJDvRTIgzif0Nqu/7wNdpJonaelCZJDPNLLwJq96OZ+8Z9lft9aj7t4v+cUCZQce4Ok6k+SxPd13vRjQ/OvQfx3MHlL2G5nrS2ZxK8z3ryX3Ln0GTVM92DakkLUj2gEqSpnMoTU/VMe2tJo6n6dW5C03P3RNYOeR1yr8muYEmGbobTS/WLjSTvbxrmJ0meVL7zwe0z7skuRC4sKpObZcdB5xGM8HOK2mG3C6luV7vLUMe35ZpbneyHs1wyAcD+7Z1PLaqrpohxtPaGH5KM+z14TTn6iiAqjolyf8Cn0nydpoZWG+kmZF2V+DVVfVLmsTxpcBHk3yE5trP1wJ/mND+pvMsmp7QHyR5N03v77XAvWiGxG4AfHGabb8CvDrJa9r97kTzvvfG/080s7weA/yaJtl7Lk3v7zeGOcbV9HqaGWi/m+TNbQxbAo+pqmdW1V+TnE4zOdAFND2m+zC41/RnwMOS7E4znPaiqlo+oNz/0ZzH9yXZgqbXflfgecChfRMQSdI6wwRUkjRQVd2QZC+aHpt/o7k9xa2APwPfAh7e3nuz19Sw1StpehG/T/Ml/6sj7PrYvtdHtM+nAju0sd3YJgCHt+s3pklId+wZ5jub57IyCfor8Avg3cCRVTXw1ho9vgnsBRxA83/pb4GXVVVvkv1Mmusa9wEOpOk5Ww58leYcUlVfTfJimt7AJ9LcWubZwH9OYn/TqaoLkvwr8GKaHruXt/tZTpNg/vf0W/NGmlulvIzmfTiVZhjzb3vK/IlmYqL9aX7AuJom0dy9qn44wjHOSVUtb4/vYJofVm5Nk+T3JtVPA94L/A9wFfBpmtl9v9RX3VLgA+36W9AkyM8dsM8bk+wGvJlmCPHmNOdzf+Cdq3tMkrS2ijN7S5IkSZK64DWgkiRJkqROmIBKkiRJkjphAipJkiRJ6oQJqCRJkiSpEyagkiRJkqROmIBKkiRJkjphAipJkiRJ6oQJqCRJkiSpEyagkiRJkqRO/P9EINccvp0DaAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1080x432 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import re\n",
    "from collections import Counter\n",
    "\n",
    "# Define a function to extract all 'C' followed by two digits from a string\n",
    "def extract_disease_classes(disease_class):\n",
    "    if isinstance(disease_class, str):\n",
    "        return re.findall(r'C\\d{2}', disease_class)\n",
    "    return []\n",
    "\n",
    "# Extract all 'C' followed by two digits from each row\n",
    "disease_classes = merged_df['diseaseClass'].apply(extract_disease_classes)\n",
    "\n",
    "# Flatten the list of lists and create a Counter object\n",
    "counts = Counter([item for sublist in disease_classes for item in sublist])\n",
    "\n",
    "# Convert the Counter object to a pandas Series\n",
    "counts_series = pd.Series(counts).sort_index()\n",
    "\n",
    "# Plot the frequencies\n",
    "plt.figure(figsize=(15, 6))\n",
    "counts_series.plot(kind='bar')\n",
    "plt.title('Distribution of Neoplasms')\n",
    "plt.xlabel('ICD-10 Disease Classification')\n",
    "plt.ylabel('Frequency')\n",
    "plt.xticks(rotation=360)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81d32c05-d421-457e-9b19-3ef26e9a865f",
   "metadata": {},
   "source": [
    "## Certain infectious and parasitic diseases(A00-B99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "bfc1849d-91a8-4104-ba3a-e45946ff5459",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = [letter + str(i).zfill(2) for letter in ['A', 'B'] for i in range(100)] \n",
    "letters_numbers = letters_numbers[:-51]  # Remove 'D49' to 'D99'\n",
    "\n",
    "# Filter the dataframe\n",
    "one = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "#filtered_df.to_csv('A00_B99_diseases.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "b21353f5-2271-4471-b564-b7d1d4a356bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(one)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e12d1b9-f729-4c77-aeb3-84a9ce4da070",
   "metadata": {},
   "source": [
    "## Neoplasms(C00-D48)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "id": "a666ed82-ac2b-425b-a532-740b30d95a9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = [letter + str(i).zfill(2) for letter in ['C', 'D'] for i in range(100)] \n",
    "letters_numbers = letters_numbers[:-51]  # Remove 'D49' to 'D99'\n",
    "\n",
    "# Filter the dataframe\n",
    "two = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "two.to_csv('C00_D48_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "8a0e864b-3ac3-451b-bf85-25b2839879bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15066"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(two)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "684e1c0f-822c-40e8-aca1-f5365a31ebae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gene_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Disease_ID</th>\n",
       "      <th>Disease</th>\n",
       "      <th>Y</th>\n",
       "      <th>diseaseClass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1832</th>\n",
       "      <td>2</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>C0024115</td>\n",
       "      <td>Lung diseases: A non-neoplastic or neoplastic ...</td>\n",
       "      <td>0.32</td>\n",
       "      <td>C08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1833</th>\n",
       "      <td>151</td>\n",
       "      <td>MDHQDPYSVQATAAIAAAITFLILFTIFGNALVILAVLTSRSLRAP...</td>\n",
       "      <td>C0024115</td>\n",
       "      <td>Lung diseases: A non-neoplastic or neoplastic ...</td>\n",
       "      <td>0.32</td>\n",
       "      <td>C08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1834</th>\n",
       "      <td>976</td>\n",
       "      <td>MGGRVFLAFCVWLTLPGAETQDSRGCARWCPQNSSCVNATACRCNP...</td>\n",
       "      <td>C0024115</td>\n",
       "      <td>Lung diseases: A non-neoplastic or neoplastic ...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>C08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Gene_ID                                               Gene Disease_ID  \\\n",
       "1832        2  MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...   C0024115   \n",
       "1833      151  MDHQDPYSVQATAAIAAAITFLILFTIFGNALVILAVLTSRSLRAP...   C0024115   \n",
       "1834      976  MGGRVFLAFCVWLTLPGAETQDSRGCARWCPQNSSCVNATACRCNP...   C0024115   \n",
       "\n",
       "                                                Disease     Y diseaseClass  \n",
       "1832  Lung diseases: A non-neoplastic or neoplastic ...  0.32          C08  \n",
       "1833  Lung diseases: A non-neoplastic or neoplastic ...  0.32          C08  \n",
       "1834  Lung diseases: A non-neoplastic or neoplastic ...  0.30          C08  "
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3499a6d6-c725-447f-9f82-b3cb83497825",
   "metadata": {},
   "source": [
    "### Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism\n",
    "(D50-D89)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "477cdc68-465b-4ac0-a8aa-25f839cae1d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['D' + str(i).zfill(2) for i in range(50, 90)]  # Generate codes from 'D50' to 'D89'\n",
    "\n",
    "# Filter the dataframe\n",
    "three = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "# filtered_df.to_csv('D50_D89_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "4043467f-df35-4e32-8635-cacfda794686",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(three)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "809230b5-56d2-4e4b-9554-20bd8f34ca46",
   "metadata": {},
   "source": [
    "### Endocrine, nutritional and metabolic diseases\n",
    "(E00-E90)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "8e411f94-e2b3-412b-85ce-a035bd328494",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['E' + str(i).zfill(2) for i in range(91)]  # Generate codes from 'E00' to 'E90'\n",
    "\n",
    "# Filter the dataframe\n",
    "four = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "# filtered_df.to_csv('E00_E90_diseases.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "11295f3c-a585-4305-b2d0-3adcac29d5d3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(four)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f746557e-ec08-4347-b147-09cff28d8e72",
   "metadata": {},
   "source": [
    "### Mental and behavioural disorders\n",
    "(F00-F99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "id": "ef35729c-2afe-4302-9a7c-0b5d1cadd1c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['F' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "five = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]\n",
    "\n",
    "# Save the dataframe to a CSV file\n",
    "two.to_csv('F00_F99_diseases.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "id": "be476cc3-8c57-4d24-814f-047fc506f4d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3467"
      ]
     },
     "execution_count": 231,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(five)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55d6eac2-cc0d-4f00-b038-63763b4dea61",
   "metadata": {},
   "source": [
    "### Diseases of the nervous system\n",
    "(G00-G99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "6e061af5-29f7-439a-8633-56996bcaadb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['G' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "six = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "ddc1f024-9b75-4e20-9e29-a49a19cb47b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(six)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4eff30f3-7e75-45cb-bed9-2f3bbc937339",
   "metadata": {},
   "source": [
    "### Diseases of the eye and adnexa\n",
    "(H00-H59)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "d6d34335-617f-4e42-90fd-6f6ea7a27b2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['H' + str(i).zfill(2) for i in range(60)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "seven = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "5c8cb0db-5e45-4f53-8772-f4798f78f5bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(seven)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8599947e-8783-4bd6-affe-438419eadb45",
   "metadata": {},
   "source": [
    "### Diseases of the ear and mastoid process\n",
    "(H60-H95)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "61d47f50-c727-42b7-a240-2cdb932b04b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['H' + str(i).zfill(2) for i in range(60, 96)]  \n",
    "# Filter the dataframe\n",
    "eight = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "aebabe47-2d8c-4789-a0b0-e965634afa22",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(eight)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5e5c746-608e-4919-9eeb-e2a125ed61b9",
   "metadata": {},
   "source": [
    "### Diseases of the circulatory system\n",
    "(I00-I99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "aebdc6fc-c72b-4787-84cb-2dcb6239e8c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['I' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "nine = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "d231eda1-64e9-4c18-bdf3-b9b2082646e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(nine)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc82bbab-2372-456c-8287-7e29263ab20f",
   "metadata": {},
   "source": [
    "### Diseases of the respiratory system\n",
    "(J00-J99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "1dc638f2-54ec-4833-a12a-51a180883097",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['J' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "ten = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "441df9f7-0a07-42d3-bcbf-106292367083",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ten)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32998a66-8fc4-409d-b488-3b572d1c0479",
   "metadata": {},
   "source": [
    "### Diseases of the digestive system\n",
    "(K00-K93)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "61d2f0a4-59ce-47e9-8f97-c23e94fb6b43",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['J' + str(i).zfill(2) for i in range(94)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "eleven = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "90c038c0-c702-4cec-b974-5b761e32786b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(eleven)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7648b37-d292-47a7-a693-7957a56ac630",
   "metadata": {},
   "source": [
    "### Diseases of the skin and subcutaneous tissue\n",
    "(L00-L99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "81ef1dd8-9f7f-4aee-a9da-72aa061892f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['L' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "twelve = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "84c14716-04ca-4f79-8d3a-77a56ad768da",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(twelve)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8e7871d3-a952-4c5b-b504-029874d22e1f",
   "metadata": {},
   "source": [
    "### Diseases of the musculoskeletal system and connective tissue\n",
    "(M00-M99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "9efa5707-509e-4417-8624-74959b60b396",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['M' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "thirteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "483fdb00-3b55-408d-9a74-6c7b7adb68a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(thirteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "705906d3-f65b-4f24-8d06-00bad5495251",
   "metadata": {},
   "source": [
    "### Diseases of the genitourinary system\n",
    "(N00-N99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "f98da88c-2dc2-4a6a-8446-0d04ea0c9a81",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['N' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "fourteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "5a9ed2b8-c678-4ece-89c5-672d28b310be",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(fourteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f2869f28-ae08-4456-95a1-d871f369ca5a",
   "metadata": {},
   "source": [
    "### Pregnancy, childbirth and the puerperium\n",
    "(O00-O99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "9dedd790-02fa-47ae-9eab-daa36aacd47a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['O' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "fiveteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "61c5fffa-be76-4d0f-b7ff-81a2e9d4f68c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(fiveteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42b83de0-0b8e-4dfe-981c-599e95981b70",
   "metadata": {},
   "source": [
    "### Certain conditions originating in the perinatal period\n",
    "(P00-P96)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "31203426-fdc2-4956-bb1f-b728e951adab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['P' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "sixteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "99167bcf-78a6-45f8-9f30-884ebed113fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(sixteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f58f4f1d-41a6-4c1d-89ec-34119441fce5",
   "metadata": {},
   "source": [
    "### Congenital malformations, deformations and chromosomal abnormalities\n",
    "(Q00-Q99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "d3e210ca-c614-4f8c-ae52-d7ea8d57789a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['Q' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "seventeen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "24fa884b-ee81-456d-b448-885db1e9cec0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(seventeen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44bb066a-f084-474c-8ea3-3ee3bc4f6488",
   "metadata": {},
   "source": [
    "### Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified\n",
    "(R00-R99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "18d9cc38-09b7-40f4-900e-e1c36f8f4434",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['R' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "eighteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "ab71af36-d0d9-44fc-b51f-c997a65da3d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(eighteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c73c1b47-174b-4eaa-836b-e9cded6a5732",
   "metadata": {},
   "source": [
    "### Injury, poisoning and certain other consequences of external causes\n",
    "(S00-T98)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "ae80ac24-b347-449a-bfe8-f897640c65d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = [letter + str(i).zfill(2) for letter in ['S', 'T'] for i in range(100)] \n",
    "\n",
    "# Filter the dataframe\n",
    "nineteen = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "88ce65dc-1026-4f73-b8c2-b8e14105f8f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(nineteen)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b51b86fc-e924-4d96-b59d-e4d10f5c6d24",
   "metadata": {},
   "source": [
    "### External causes of morbidity and mortality\n",
    "(V01-Y98)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "e5d7c967-f9a9-44d1-89fe-107e96b85d97",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = [letter + str(i).zfill(2) for letter in ['V', 'Y'] for i in range(100)] \n",
    "\n",
    "# Filter the dataframe\n",
    "twenty = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "0c60c6d1-9136-4244-998d-c619f9cbca6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(twenty)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "212a768f-0a47-447a-b3bb-da1a94ba02c4",
   "metadata": {},
   "source": [
    "### Factors influencing health status and contact with health services\n",
    "(Z00-Z99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "ff4f719e-3256-4d7f-9d79-9bbaabf7af7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['Z' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "twenty_one = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "2f267af3-f189-4e40-9cd5-5a666dd77009",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(twenty_one)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5989089-0852-4df9-a081-d0b58150a9ad",
   "metadata": {},
   "source": [
    "### Codes for special purposes\n",
    "(U00-U85)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "538139eb-bf81-4548-af4d-cc6fff8327dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the range of letters and numbers\n",
    "letters_numbers = ['U' + str(i).zfill(2) for i in range(100)]  \n",
    "\n",
    "# Filter the dataframe\n",
    "twenty_two = merged_df[merged_df['diseaseClass'].apply(lambda x: x in letters_numbers if isinstance(x, str) else False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "1f66118e-9335-4dd2-b324-03f131c24bbf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(twenty_two)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3406771e-db74-4ad1-946d-e84661139914",
   "metadata": {},
   "source": [
    "### Load all disgenet GDA data\n",
    "> all_gene_disease_associations.tsv can be obtained from https://www.disgenet.org/downloads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "4d2611dc-9e3e-4666-a1e8-7314683d7f13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>PLI</th>\n",
       "      <th>protein_class_name</th>\n",
       "      <th>protein_class</th>\n",
       "      <th>NofDiseases</th>\n",
       "      <th>NofPmids</th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "      <th>diseaseType</th>\n",
       "      <th>diseaseClass</th>\n",
       "      <th>diseaseSemanticType</th>\n",
       "      <th>NofGenes</th>\n",
       "      <th>NofPmids.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>4.991700e-09</td>\n",
       "      <td>Receptor</td>\n",
       "      <td>DTO_05007575</td>\n",
       "      <td>27.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>C0000727</td>\n",
       "      <td>Abdomen, Acute</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C23</td>\n",
       "      <td>Sign or Symptom</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>A2M</td>\n",
       "      <td>0.529</td>\n",
       "      <td>0.769</td>\n",
       "      <td>4.522900e-11</td>\n",
       "      <td>Enzyme modulator</td>\n",
       "      <td>DTO_05007584</td>\n",
       "      <td>147.0</td>\n",
       "      <td>145.0</td>\n",
       "      <td>C0000729</td>\n",
       "      <td>Abdominal Cramps</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C16</td>\n",
       "      <td>Sign or Symptom</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>A2MP1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>C0000731</td>\n",
       "      <td>Abdomen distended</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C06</td>\n",
       "      <td>Finding</td>\n",
       "      <td>103</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>NAT1</td>\n",
       "      <td>0.536</td>\n",
       "      <td>0.846</td>\n",
       "      <td>1.929400e-14</td>\n",
       "      <td>Enzyme</td>\n",
       "      <td>DTO_05007624</td>\n",
       "      <td>133.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>C0000734</td>\n",
       "      <td>Abdominal mass</td>\n",
       "      <td>phenotype</td>\n",
       "      <td>C06</td>\n",
       "      <td>Finding</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10.0</td>\n",
       "      <td>NAT2</td>\n",
       "      <td>0.451</td>\n",
       "      <td>0.885</td>\n",
       "      <td>3.274400e-06</td>\n",
       "      <td>Enzyme</td>\n",
       "      <td>DTO_05007624</td>\n",
       "      <td>311.0</td>\n",
       "      <td>627.0</td>\n",
       "      <td>C0000735</td>\n",
       "      <td>Abdominal Neoplasms</td>\n",
       "      <td>group</td>\n",
       "      <td>C04</td>\n",
       "      <td>Neoplastic Process</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   geneId geneSymbol    DSI    DPI           PLI protein_class_name  \\\n",
       "0     1.0       A1BG  0.700  0.538  4.991700e-09           Receptor   \n",
       "1     2.0        A2M  0.529  0.769  4.522900e-11   Enzyme modulator   \n",
       "2     3.0      A2MP1    NaN    NaN           NaN                NaN   \n",
       "3     9.0       NAT1  0.536  0.846  1.929400e-14             Enzyme   \n",
       "4    10.0       NAT2  0.451  0.885  3.274400e-06             Enzyme   \n",
       "\n",
       "  protein_class  NofDiseases  NofPmids diseaseId          diseaseName  \\\n",
       "0  DTO_05007575         27.0      20.0  C0000727       Abdomen, Acute   \n",
       "1  DTO_05007584        147.0     145.0  C0000729     Abdominal Cramps   \n",
       "2           NaN          1.0       1.0  C0000731    Abdomen distended   \n",
       "3  DTO_05007624        133.0     184.0  C0000734       Abdominal mass   \n",
       "4  DTO_05007624        311.0     627.0  C0000735  Abdominal Neoplasms   \n",
       "\n",
       "  diseaseType diseaseClass diseaseSemanticType  NofGenes  NofPmids.1  \n",
       "0   phenotype          C23     Sign or Symptom         2           2  \n",
       "1   phenotype          C16     Sign or Symptom         1           1  \n",
       "2   phenotype          C06             Finding       103           0  \n",
       "3   phenotype          C06             Finding         2           0  \n",
       "4       group          C04  Neoplastic Process        13          13  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "all_gda_df = pd.read_csv(\"all_gene_disease_associations.tsv\")\n",
    "all_gda_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "973bbff1-2ea7-4aca-805f-9c73aa7aef0d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>C0000727</td>\n",
       "      <td>Abdomen, Acute</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>A2M</td>\n",
       "      <td>0.529</td>\n",
       "      <td>0.769</td>\n",
       "      <td>C0000729</td>\n",
       "      <td>Abdominal Cramps</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>A2MP1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C0000731</td>\n",
       "      <td>Abdomen distended</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>NAT1</td>\n",
       "      <td>0.536</td>\n",
       "      <td>0.846</td>\n",
       "      <td>C0000734</td>\n",
       "      <td>Abdominal mass</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10.0</td>\n",
       "      <td>NAT2</td>\n",
       "      <td>0.451</td>\n",
       "      <td>0.885</td>\n",
       "      <td>C0000735</td>\n",
       "      <td>Abdominal Neoplasms</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   geneId geneSymbol    DSI    DPI diseaseId          diseaseName\n",
       "0     1.0       A1BG  0.700  0.538  C0000727       Abdomen, Acute\n",
       "1     2.0        A2M  0.529  0.769  C0000729     Abdominal Cramps\n",
       "2     3.0      A2MP1    NaN    NaN  C0000731    Abdomen distended\n",
       "3     9.0       NAT1  0.536  0.846  C0000734       Abdominal mass\n",
       "4    10.0       NAT2  0.451  0.885  C0000735  Abdominal Neoplasms"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_gda_df = all_gda_df[\n",
    "    [\"geneId\", \"geneSymbol\", \"DSI\", \"DPI\", \"diseaseId\", \"diseaseName\"]\n",
    "]  # Get needed columns\n",
    "all_gda_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "138de5a6-6390-4d75-8720-b876810bbac5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21666, 30170)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_gda_df.geneSymbol.nunique(), all_gda_df.diseaseId.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fcaf690-024e-4f67-a8ca-36a51905286c",
   "metadata": {},
   "source": [
    "### Create disease to disease_description mapping from MGDEF of DisGenNet\n",
    "> MGDEF.csv can be obtained from https://ftp.ncbi.nlm.nih.gov/pub/medgen/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5c88de5e-d785-4389-a6c8-9e3cdc3bd2da",
   "metadata": {},
   "outputs": [],
   "source": [
    "disease_def_df = pd.read_csv(\"MGDEF.csv\")\n",
    "disease_def_df.head()\n",
    "disease_des_dict = {}\n",
    "for _, item in all_gda_df.iterrows():\n",
    "    disease_des_dict[item[\"diseaseId\"]] = item[\n",
    "        \"diseaseName\"\n",
    "    ]  ## Default disease description is disease name\n",
    "for _, item in disease_def_df.iterrows():\n",
    "    disease_des_dict[item[\"CUI\"]] = item[\"DEF\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8efafc43-3622-484e-b17d-f2a3d29314bc",
   "metadata": {},
   "source": [
    "### Map CUI to Disease Description"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "10f91e00-769f-4f41-af6b-703199b96815",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_gda_df[\"diseaseDes\"] = all_gda_df[\"diseaseId\"].map(lambda x: disease_des_dict[x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7f114cda-cb59-4ad8-8ffd-b79896144d0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open(\"gene_symbol.txt\",'w') as f: # This is a gene_symbol use to retrieval in https://www.uniprot.org/uploadlists/\n",
    "#     f.write(\" \".join(all_gda_df['geneSymbol'].unique()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed9e807c-68ad-4633-a5ca-21e2782db213",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Create gene to gene_protein_seq mapping from STRING"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "b756cbce-705a-4c2b-8c19-2c89b29a708f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19563"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prot_info_df = pd.read_csv(\"9606.protein.info.v12.0.txt\", \"\\t\")\n",
    "prot_info_gene = prot_info_df[\"preferred_name\"].unique()\n",
    "prot_info_df[\"preferred_name\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "522cd3a5-ac8a-4a5d-a4fb-e23f648a50e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>#string_protein_id</th>\n",
       "      <th>preferred_name</th>\n",
       "      <th>protein_size</th>\n",
       "      <th>annotation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9606.ENSP00000000233</td>\n",
       "      <td>ARF5</td>\n",
       "      <td>180</td>\n",
       "      <td>ADP-ribosylation factor 5; GTP-binding protein...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9606.ENSP00000000412</td>\n",
       "      <td>M6PR</td>\n",
       "      <td>277</td>\n",
       "      <td>Cation-dependent mannose-6-phosphate receptor;...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9606.ENSP00000001008</td>\n",
       "      <td>FKBP4</td>\n",
       "      <td>459</td>\n",
       "      <td>Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     #string_protein_id preferred_name  protein_size  \\\n",
       "0  9606.ENSP00000000233           ARF5           180   \n",
       "1  9606.ENSP00000000412           M6PR           277   \n",
       "2  9606.ENSP00000001008          FKBP4           459   \n",
       "\n",
       "                                          annotation  \n",
       "0  ADP-ribosylation factor 5; GTP-binding protein...  \n",
       "1  Cation-dependent mannose-6-phosphate receptor;...  \n",
       "2  Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prot_info_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "5ab7ea22-2143-4430-8a48-54b950ca59bb",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "sequence item 5756: expected str instance, float found",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_89/1478609678.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     10\u001b[0m     \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexist_gene\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"gene_exist_NOT_in_STRING.txt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"w\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m     \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnot_exist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     13\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnot_exist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexist_gene\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: sequence item 5756: expected str instance, float found"
     ]
    }
   ],
   "source": [
    "# Check overlapping between all_gda_df and string\n",
    "not_exist = []\n",
    "exist_gene = []\n",
    "for gene_name in all_gda_df[\"geneSymbol\"].unique():\n",
    "    if gene_name not in prot_info_gene:\n",
    "        not_exist.append(gene_name)\n",
    "    else:\n",
    "        exist_gene.append(gene_name)\n",
    "with open(\"gene_exist_in_STRING.txt\", \"w\") as f:\n",
    "    f.write(\" \".join(exist_gene))\n",
    "with open(\"gene_exist_NOT_in_STRING.txt\", \"w\") as f:\n",
    "    f.write(\" \".join(not_exist))\n",
    "len(not_exist), len(exist_gene)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79db8c43-432b-4dc6-b708-a86c29fdc18f",
   "metadata": {},
   "source": [
    "### Filter out all the association according to gene_exist_in_STRING.txt\n",
    "> 9606.protein.sequences.v11.5.fa can be obtained from https://string-db.org/cgi/download?sessionId=bznr4eHEqnC5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5164a97b-a9a0-4483-9944-d0bd6074276a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the number of gda: 1134942\n"
     ]
    }
   ],
   "source": [
    "print(f\"the number of gda: {len(all_gda_df.index)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "71bbfccc-bf3a-426c-aba9-5dce4c4acab1",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_gda_df_filtered = all_gda_df[\n",
    "    all_gda_df.geneSymbol.isin(exist_gene)\n",
    "]  ## Filter by exist_gene"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a9d85cf1-d79e-4ea4-8c14-4c0199e2e7ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# create protein to seqence mapping\n",
    "\n",
    "# with open(\"9606.protein.sequences.v11.5.fa\") as f:\n",
    "with open(\"gencode.v44.pc_translations.fa\") as f:\n",
    "    lines = f.readlines()\n",
    "    line_str = \"\".join(lines)\n",
    "id_seq_str_list = line_str.split(\">\")\n",
    "prot_id_to_seq = {}\n",
    "for id_seq_str in id_seq_str_list:\n",
    "    tokens = id_seq_str.split(\"\\n\")\n",
    "    key = tokens[0]\n",
    "    val = \"\".join(tokens[1:])\n",
    "    prot_id_to_seq[key] = val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e0bf097b-8bf1-4ee5-ab3f-fa615b5c131d",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Create gene to protein sequence mapping\n",
    "gene_to_seq_dict = {}\n",
    "no_seq_gene_list = (\n",
    "    []\n",
    ")  ## gene name and protein id cannot find in the prot String dataset\n",
    "multi_seq_gene_list = (\n",
    "    []\n",
    ")  ## gene name and protein id cannot find in the prot String dataset\n",
    "\n",
    "for _, item in prot_info_df.iterrows():\n",
    "    gene_name = item[\"preferred_name\"]\n",
    "    if gene_name in exist_gene:\n",
    "        protein_id = item[\"#string_protein_id\"]\n",
    "        gene_to_seq_dict[gene_name] = prot_id_to_seq[protein_id]\n",
    "        if (gene_name in gene_to_seq_dict) and (\n",
    "            prot_id_to_seq[protein_id] != gene_to_seq_dict[gene_name]\n",
    "        ):\n",
    "            multi_seq_gene_list.append(gene_name)\n",
    "    else:\n",
    "        no_seq_gene_list.append(gene_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "667b40ff-c223-4790-ae44-ea3e81ecc829",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3656, 0)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(no_seq_gene_list), len(multi_seq_gene_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8b0cdc7a-b275-472d-89b6-f657ed6389f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15910"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_gene_name_list = gene_to_seq_dict.keys()\n",
    "all_gda_df_filtered = all_gda_df[all_gda_df.geneSymbol.isin(final_gene_name_list)]\n",
    "len(gene_to_seq_dict)  ## Final gene to seq mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "1082817e-c3b7-4d0e-826a-cd2e5b2482a4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_101603/1325574237.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  all_gda_df_filtered[\"proteinSeq\"] = all_gda_df_filtered[\"geneSymbol\"].apply(lambda x: gene_to_seq_dict[x])\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>score</th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>proteinSeq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0001418</td>\n",
       "      <td>Adenocarcinoma</td>\n",
       "      <td>A common cancer characterized by the presence ...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0002736</td>\n",
       "      <td>Amyotrophic Lateral Sclerosis</td>\n",
       "      <td>A neurodegenerative disease characterized by p...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003578</td>\n",
       "      <td>Apnea</td>\n",
       "      <td>Lack of breathing with no movement of the resp...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003864</td>\n",
       "      <td>Arthritis</td>\n",
       "      <td>Inflammation of a joint.</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0008373</td>\n",
       "      <td>Cholesteatoma</td>\n",
       "      <td>Cholesteatoma is a benign but potentially dest...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1132776</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0344395</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1132777</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0410702</td>\n",
       "      <td>Adolescent idiopathic scoliosis</td>\n",
       "      <td>A scoliosis with no known cause arising in ado...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1132778</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0489786</td>\n",
       "      <td>Height</td>\n",
       "      <td>Height</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1132779</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1287365</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1132780</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1837461</td>\n",
       "      <td>SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3</td>\n",
       "      <td>Idiopathic scoliosis, an abnormality of the ve...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1019926 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            geneId       geneSymbol    DSI    DPI  score diseaseId  \\\n",
       "0                1             A1BG  0.700  0.538   0.01  C0001418   \n",
       "1                1             A1BG  0.700  0.538   0.01  C0002736   \n",
       "2                1             A1BG  0.700  0.538   0.01  C0003578   \n",
       "3                1             A1BG  0.700  0.538   0.01  C0003864   \n",
       "4                1             A1BG  0.700  0.538   0.01  C0008373   \n",
       "...            ...              ...    ...    ...    ...       ...   \n",
       "1132776  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0344395   \n",
       "1132777  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0410702   \n",
       "1132778  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0489786   \n",
       "1132779  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C1287365   \n",
       "1132780  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C1837461   \n",
       "\n",
       "                                       diseaseName  \\\n",
       "0                                   Adenocarcinoma   \n",
       "1                    Amyotrophic Lateral Sclerosis   \n",
       "2                                            Apnea   \n",
       "3                                        Arthritis   \n",
       "4                                    Cholesteatoma   \n",
       "...                                            ...   \n",
       "1132776                      Bilirubin measurement   \n",
       "1132777            Adolescent idiopathic scoliosis   \n",
       "1132778                                     Height   \n",
       "1132779                     Bilirubin level result   \n",
       "1132780  SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3   \n",
       "\n",
       "                                                diseaseDes  \\\n",
       "0        A common cancer characterized by the presence ...   \n",
       "1        A neurodegenerative disease characterized by p...   \n",
       "2        Lack of breathing with no movement of the resp...   \n",
       "3                                 Inflammation of a joint.   \n",
       "4        Cholesteatoma is a benign but potentially dest...   \n",
       "...                                                    ...   \n",
       "1132776                              Bilirubin measurement   \n",
       "1132777  A scoliosis with no known cause arising in ado...   \n",
       "1132778                                             Height   \n",
       "1132779                             Bilirubin level result   \n",
       "1132780  Idiopathic scoliosis, an abnormality of the ve...   \n",
       "\n",
       "                                                proteinSeq  \n",
       "0        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "1        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "2        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "3        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "4        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "...                                                    ...  \n",
       "1132776  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "1132777  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "1132778  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "1132779  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "1132780  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "\n",
       "[1019926 rows x 9 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# map gene to protein sequence\n",
    "all_gda_df_filtered[\"proteinSeq\"] = all_gda_df_filtered[\"geneSymbol\"].apply(\n",
    "    lambda x: gene_to_seq_dict[x]\n",
    ")\n",
    "all_gda_df_filtered"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5dd33480-9196-47da-9c20-fe8f954360eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_gda_df_filtered.to_csv(\"disgenet_all.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9bd2ee3e-8ddb-4d75-a9f6-28d1cea2df60",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_full = pd.read_csv(\"disgenet_all.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a9b05483-7d18-4427-9d74-7b308f725110",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_pair_idx(item, col1=\"geneId\", col2=\"diseaseId\"):\n",
    "    pair_idx = str(item[col1]).strip() + \"-\" + item[col2].strip() \n",
    "    return pair_idx\n",
    "df_full[\"pair_idx\"] = df_full.apply(get_pair_idx, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e9246389-7131-4cc0-897c-8d326aa441a6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Unnamed: 0.1</th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>score</th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>proteinSeq</th>\n",
       "      <th>pair_idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0001418</td>\n",
       "      <td>Adenocarcinoma</td>\n",
       "      <td>A common cancer characterized by the presence ...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>1-C0001418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0002736</td>\n",
       "      <td>Amyotrophic Lateral Sclerosis</td>\n",
       "      <td>A neurodegenerative disease characterized by p...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>1-C0002736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003578</td>\n",
       "      <td>Apnea</td>\n",
       "      <td>Lack of breathing with no movement of the resp...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>1-C0003578</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003864</td>\n",
       "      <td>Arthritis</td>\n",
       "      <td>Inflammation of a joint.</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>1-C0003864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0008373</td>\n",
       "      <td>Cholesteatoma</td>\n",
       "      <td>Cholesteatoma is a benign but potentially dest...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>1-C0008373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018350</th>\n",
       "      <td>1019922</td>\n",
       "      <td>1132776</td>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0344395</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "      <td>115072896-C0344395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018351</th>\n",
       "      <td>1019923</td>\n",
       "      <td>1132777</td>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0410702</td>\n",
       "      <td>Adolescent idiopathic scoliosis</td>\n",
       "      <td>A scoliosis with no known cause arising in ado...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "      <td>115072896-C0410702</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018352</th>\n",
       "      <td>1019924</td>\n",
       "      <td>1132778</td>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0489786</td>\n",
       "      <td>Height</td>\n",
       "      <td>Height</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "      <td>115072896-C0489786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018353</th>\n",
       "      <td>1019925</td>\n",
       "      <td>1132779</td>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1287365</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "      <td>115072896-C1287365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018354</th>\n",
       "      <td>1019926</td>\n",
       "      <td>1132780</td>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1837461</td>\n",
       "      <td>SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3</td>\n",
       "      <td>Idiopathic scoliosis, an abnormality of the ve...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "      <td>115072896-C1837461</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1018355 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Unnamed: 0  Unnamed: 0.1     geneId       geneSymbol    DSI    DPI  \\\n",
       "0                 0             0          1             A1BG  0.700  0.538   \n",
       "1                 1             1          1             A1BG  0.700  0.538   \n",
       "2                 2             2          1             A1BG  0.700  0.538   \n",
       "3                 3             3          1             A1BG  0.700  0.538   \n",
       "4                 4             4          1             A1BG  0.700  0.538   \n",
       "...             ...           ...        ...              ...    ...    ...   \n",
       "1018350     1019922       1132776  115072896  SLCO1B3-SLCO1B7  0.839  0.077   \n",
       "1018351     1019923       1132777  115072896  SLCO1B3-SLCO1B7  0.839  0.077   \n",
       "1018352     1019924       1132778  115072896  SLCO1B3-SLCO1B7  0.839  0.077   \n",
       "1018353     1019925       1132779  115072896  SLCO1B3-SLCO1B7  0.839  0.077   \n",
       "1018354     1019926       1132780  115072896  SLCO1B3-SLCO1B7  0.839  0.077   \n",
       "\n",
       "         score diseaseId                                diseaseName  \\\n",
       "0         0.01  C0001418                             Adenocarcinoma   \n",
       "1         0.01  C0002736              Amyotrophic Lateral Sclerosis   \n",
       "2         0.01  C0003578                                      Apnea   \n",
       "3         0.01  C0003864                                  Arthritis   \n",
       "4         0.01  C0008373                              Cholesteatoma   \n",
       "...        ...       ...                                        ...   \n",
       "1018350   0.10  C0344395                      Bilirubin measurement   \n",
       "1018351   0.10  C0410702            Adolescent idiopathic scoliosis   \n",
       "1018352   0.10  C0489786                                     Height   \n",
       "1018353   0.10  C1287365                     Bilirubin level result   \n",
       "1018354   0.10  C1837461  SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3   \n",
       "\n",
       "                                                diseaseDes  \\\n",
       "0        A common cancer characterized by the presence ...   \n",
       "1        A neurodegenerative disease characterized by p...   \n",
       "2        Lack of breathing with no movement of the resp...   \n",
       "3                                 Inflammation of a joint.   \n",
       "4        Cholesteatoma is a benign but potentially dest...   \n",
       "...                                                    ...   \n",
       "1018350                              Bilirubin measurement   \n",
       "1018351  A scoliosis with no known cause arising in ado...   \n",
       "1018352                                             Height   \n",
       "1018353                             Bilirubin level result   \n",
       "1018354  Idiopathic scoliosis, an abnormality of the ve...   \n",
       "\n",
       "                                                proteinSeq            pair_idx  \n",
       "0        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...          1-C0001418  \n",
       "1        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...          1-C0002736  \n",
       "2        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...          1-C0003578  \n",
       "3        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...          1-C0003864  \n",
       "4        MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...          1-C0008373  \n",
       "...                                                    ...                 ...  \n",
       "1018350  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  115072896-C0344395  \n",
       "1018351  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  115072896-C0410702  \n",
       "1018352  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  115072896-C0489786  \n",
       "1018353  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  115072896-C1287365  \n",
       "1018354  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  115072896-C1837461  \n",
       "\n",
       "[1018355 rows x 12 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_full"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "7b5cd360-7f84-4895-87ac-4d176eef0981",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "def get_pair_idx(item, col1=\"ID1\", col2=\"ID2\"):\n",
    "    pair_idx = str(item[col1]).strip() + \"-\" + item[col2].strip() \n",
    "    return pair_idx\n",
    "df = pd.read_csv(\"disgenet.csv\")\n",
    "df[\"pair_idx\"] = df.apply(get_pair_idx, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "4f0b95a5-190b-44c4-a0ec-eda2d382ef44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>ID1</th>\n",
       "      <th>X1</th>\n",
       "      <th>ID2</th>\n",
       "      <th>X2</th>\n",
       "      <th>Y</th>\n",
       "      <th>pair_idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>C0019209</td>\n",
       "      <td>Hepatomegaly: Abnormal enlargement of the liver.</td>\n",
       "      <td>0.30</td>\n",
       "      <td>1-C0019209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "      <td>C0036341</td>\n",
       "      <td>Schizophrenia: Schizophrenia is highly heritab...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>1-C0036341</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>C0002395</td>\n",
       "      <td>Alzheimer's Disease: Alzheimer disease is the ...</td>\n",
       "      <td>0.50</td>\n",
       "      <td>2-C0002395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>C0007102</td>\n",
       "      <td>Malignant tumor of colon: A primary or metasta...</td>\n",
       "      <td>0.31</td>\n",
       "      <td>2-C0007102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...</td>\n",
       "      <td>C0009375</td>\n",
       "      <td>Colonic Neoplasms: A benign or malignant neopl...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>2-C0009375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52471</th>\n",
       "      <td>52471</td>\n",
       "      <td>100507436</td>\n",
       "      <td>MGLGPVFLLLAGIFPFAPPGAAAEPHSLRYNLTVLSWDGSVQSGFL...</td>\n",
       "      <td>C0029172</td>\n",
       "      <td>Oral Submucous Fibrosis: Irreversible FIBROSIS...</td>\n",
       "      <td>0.31</td>\n",
       "      <td>100507436-C0029172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52472</th>\n",
       "      <td>52472</td>\n",
       "      <td>100507436</td>\n",
       "      <td>MGLGPVFLLLAGIFPFAPPGAAAEPHSLRYNLTVLSWDGSVQSGFL...</td>\n",
       "      <td>C2239176</td>\n",
       "      <td>Liver carcinoma: Hepatocellular carcinoma is t...</td>\n",
       "      <td>0.40</td>\n",
       "      <td>100507436-C2239176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52473</th>\n",
       "      <td>52473</td>\n",
       "      <td>100820829</td>\n",
       "      <td>MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...</td>\n",
       "      <td>C0087031</td>\n",
       "      <td>Juvenile-Onset Still Disease: An inflammatory ...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>100820829-C0087031</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52474</th>\n",
       "      <td>52474</td>\n",
       "      <td>100820829</td>\n",
       "      <td>MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...</td>\n",
       "      <td>C3495559</td>\n",
       "      <td>Juvenile arthritis: Juvenile arthritis (JUVAR)...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>100820829-C3495559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52475</th>\n",
       "      <td>52475</td>\n",
       "      <td>100820829</td>\n",
       "      <td>MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...</td>\n",
       "      <td>C3714758</td>\n",
       "      <td>Juvenile psoriatic arthritis: Childhood arthri...</td>\n",
       "      <td>0.30</td>\n",
       "      <td>100820829-C3714758</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52476 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0        ID1  \\\n",
       "0               0          1   \n",
       "1               1          1   \n",
       "2               2          2   \n",
       "3               3          2   \n",
       "4               4          2   \n",
       "...           ...        ...   \n",
       "52471       52471  100507436   \n",
       "52472       52472  100507436   \n",
       "52473       52473  100820829   \n",
       "52474       52474  100820829   \n",
       "52475       52475  100820829   \n",
       "\n",
       "                                                      X1       ID2  \\\n",
       "0      MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  C0019209   \n",
       "1      MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  C0036341   \n",
       "2      MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  C0002395   \n",
       "3      MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  C0007102   \n",
       "4      MGKNKLLHPSLVLLLLVLLPTDASVSGKPQYMVLVPSLLHTETTEK...  C0009375   \n",
       "...                                                  ...       ...   \n",
       "52471  MGLGPVFLLLAGIFPFAPPGAAAEPHSLRYNLTVLSWDGSVQSGFL...  C0029172   \n",
       "52472  MGLGPVFLLLAGIFPFAPPGAAAEPHSLRYNLTVLSWDGSVQSGFL...  C2239176   \n",
       "52473  MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...  C0087031   \n",
       "52474  MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...  C3495559   \n",
       "52475  MLRSTSTVTLLSGGAARTPGAPSRRANVCRLRLTVPPESPVPEQCE...  C3714758   \n",
       "\n",
       "                                                      X2     Y  \\\n",
       "0       Hepatomegaly: Abnormal enlargement of the liver.  0.30   \n",
       "1      Schizophrenia: Schizophrenia is highly heritab...  0.30   \n",
       "2      Alzheimer's Disease: Alzheimer disease is the ...  0.50   \n",
       "3      Malignant tumor of colon: A primary or metasta...  0.31   \n",
       "4      Colonic Neoplasms: A benign or malignant neopl...  0.30   \n",
       "...                                                  ...   ...   \n",
       "52471  Oral Submucous Fibrosis: Irreversible FIBROSIS...  0.31   \n",
       "52472  Liver carcinoma: Hepatocellular carcinoma is t...  0.40   \n",
       "52473  Juvenile-Onset Still Disease: An inflammatory ...  0.30   \n",
       "52474  Juvenile arthritis: Juvenile arthritis (JUVAR)...  0.30   \n",
       "52475  Juvenile psoriatic arthritis: Childhood arthri...  0.30   \n",
       "\n",
       "                 pair_idx  \n",
       "0              1-C0019209  \n",
       "1              1-C0036341  \n",
       "2              2-C0002395  \n",
       "3              2-C0007102  \n",
       "4              2-C0009375  \n",
       "...                   ...  \n",
       "52471  100507436-C0029172  \n",
       "52472  100507436-C2239176  \n",
       "52473  100820829-C0087031  \n",
       "52474  100820829-C3495559  \n",
       "52475  100820829-C3714758  \n",
       "\n",
       "[52476 rows x 7 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "962a4437-e3dd-4a86-a36a-78481e828c4e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Unnamed: 0', 'Unnamed: 0.1', 'geneId', 'geneSymbol', 'DSI', 'DPI',\n",
       "       'score', 'diseaseId', 'diseaseName', 'diseaseDes', 'proteinSeq',\n",
       "       'pair_idx'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_full.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "067db99d-75c1-496f-8ec9-65db73a35fe9",
   "metadata": {},
   "outputs": [],
   "source": [
    "pair_idx = df.pair_idx.to_numpy()\n",
    "df_full = df_full[~df_full.pair_idx.isin(pair_idx)][['geneId', 'geneSymbol', 'DSI', 'DPI',\n",
    "       'score', 'diseaseId', 'diseaseName', 'diseaseDes', 'proteinSeq']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "f1a4d89f-84bb-4e80-a080-83204bf701ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_full.to_csv(\"disgenet_all.csv\",index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c01387f1-5892-497a-8d09-9c0502fde2a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/nfs/protbert/notebooks/data_processing\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "99a0d6ee-faf6-4451-aefa-78f9b3a2406f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>geneId</th>\n",
       "      <th>geneSymbol</th>\n",
       "      <th>DSI</th>\n",
       "      <th>DPI</th>\n",
       "      <th>score</th>\n",
       "      <th>diseaseId</th>\n",
       "      <th>diseaseName</th>\n",
       "      <th>diseaseDes</th>\n",
       "      <th>proteinSeq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0001418</td>\n",
       "      <td>Adenocarcinoma</td>\n",
       "      <td>A common cancer characterized by the presence ...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0002736</td>\n",
       "      <td>Amyotrophic Lateral Sclerosis</td>\n",
       "      <td>A neurodegenerative disease characterized by p...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003578</td>\n",
       "      <td>Apnea</td>\n",
       "      <td>Lack of breathing with no movement of the resp...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0003864</td>\n",
       "      <td>Arthritis</td>\n",
       "      <td>Inflammation of a joint.</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.01</td>\n",
       "      <td>C0008373</td>\n",
       "      <td>Cholesteatoma</td>\n",
       "      <td>Cholesteatoma is a benign but potentially dest...</td>\n",
       "      <td>MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>966876</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0344395</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>Bilirubin measurement</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>966877</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0410702</td>\n",
       "      <td>Adolescent idiopathic scoliosis</td>\n",
       "      <td>A scoliosis with no known cause arising in ado...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>966878</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C0489786</td>\n",
       "      <td>Height</td>\n",
       "      <td>Height</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>966879</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1287365</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>Bilirubin level result</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>966880</th>\n",
       "      <td>115072896</td>\n",
       "      <td>SLCO1B3-SLCO1B7</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.077</td>\n",
       "      <td>0.10</td>\n",
       "      <td>C1837461</td>\n",
       "      <td>SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3</td>\n",
       "      <td>Idiopathic scoliosis, an abnormality of the ve...</td>\n",
       "      <td>MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>966881 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           geneId       geneSymbol    DSI    DPI  score diseaseId  \\\n",
       "0               1             A1BG  0.700  0.538   0.01  C0001418   \n",
       "1               1             A1BG  0.700  0.538   0.01  C0002736   \n",
       "2               1             A1BG  0.700  0.538   0.01  C0003578   \n",
       "3               1             A1BG  0.700  0.538   0.01  C0003864   \n",
       "4               1             A1BG  0.700  0.538   0.01  C0008373   \n",
       "...           ...              ...    ...    ...    ...       ...   \n",
       "966876  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0344395   \n",
       "966877  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0410702   \n",
       "966878  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C0489786   \n",
       "966879  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C1287365   \n",
       "966880  115072896  SLCO1B3-SLCO1B7  0.839  0.077   0.10  C1837461   \n",
       "\n",
       "                                      diseaseName  \\\n",
       "0                                  Adenocarcinoma   \n",
       "1                   Amyotrophic Lateral Sclerosis   \n",
       "2                                           Apnea   \n",
       "3                                       Arthritis   \n",
       "4                                   Cholesteatoma   \n",
       "...                                           ...   \n",
       "966876                      Bilirubin measurement   \n",
       "966877            Adolescent idiopathic scoliosis   \n",
       "966878                                     Height   \n",
       "966879                     Bilirubin level result   \n",
       "966880  SCOLIOSIS, ISOLATED, SUSCEPTIBILITY TO, 3   \n",
       "\n",
       "                                               diseaseDes  \\\n",
       "0       A common cancer characterized by the presence ...   \n",
       "1       A neurodegenerative disease characterized by p...   \n",
       "2       Lack of breathing with no movement of the resp...   \n",
       "3                                Inflammation of a joint.   \n",
       "4       Cholesteatoma is a benign but potentially dest...   \n",
       "...                                                   ...   \n",
       "966876                              Bilirubin measurement   \n",
       "966877  A scoliosis with no known cause arising in ado...   \n",
       "966878                                             Height   \n",
       "966879                             Bilirubin level result   \n",
       "966880  Idiopathic scoliosis, an abnormality of the ve...   \n",
       "\n",
       "                                               proteinSeq  \n",
       "0       MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "1       MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "2       MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "3       MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "4       MSMLVVFLLLWGVTWGPVTEAAIFYETQPSLWAESESLLKPLANVT...  \n",
       "...                                                   ...  \n",
       "966876  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "966877  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "966878  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "966879  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "966880  MDQHQHLNKTAESASSEKKKTRRCNGFKMFLAALSFSYIAKALGGI...  \n",
       "\n",
       "[966881 rows x 9 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df_full = pd.read_csv(\"../../data/pretrain/disgenet_gda.csv\")\n",
    "df_full"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e893b37b-7d6b-4aa4-b53a-613cc32bb1e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/anaconda3/lib/python3.9/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAirElEQVR4nO3de5RcZZnv8e+zq6pvSciFdGK4GUBQGS9BWmDGcUZFFNEjusZZDiqi4xhnvCycmXPOcFxnjTqeNUfPGUfnOMsLChrvqDhykVGQi4jKpQMhBAIEuUYC6QAhJOmku2s/54+9d3V1d3W6urt2VaXe32etXlW16/K+O5ennn7ed7+vuTsiIhKOqNUdEBGR5lLgFxEJjAK/iEhgFPhFRAKjwC8iEphiqztQj+XLl/vq1atb3Q0RkYPK+vXrd7h7/+TjB0XgX716NYODg63uhojIQcXMHq51XKUeEZHAKPCLiARGgV9EJDAK/CIigVHgFxEJjAK/iEhgFPhFRAKjwC8iEpigAr+7c8bnb+A/bt/a6q6IiLRMUIF/z0iZex5/lvu37251V0REWiaowP/M8CgA5bjFHRERaaGwAv/eJPDH2m5SRAIWVuBPM/6xsgK/iIQrsMA/AijjF5GwBRb404w/VpFfRMIVZODX4K6IhCzIwB/HKvWISLhyD/xmVjCz283sivTxMjO72sy2pLdL8+5DZrzUo8AvIuFqRsZ/HrC56vH5wDXufhxwTfq4KZ4ZHgM0uCsiYcs18JvZEcAbga9VHT4LWJfeXwe8Jc8+VFPGLyKSf8b/eeC/A9XDqSvdfRtAersi5z5UqMYvIpJj4DezNwHb3X39HN+/1swGzWxwaGioIX3apemcIiK5ZvyvAN5sZg8B3wdeY2bfBp4ws1UA6e32Wm929wvcfcDdB/r7+xvSIU3nFBHJMfC7+/9w9yPcfTXwF8C17v4u4DLg3PRl5wKX5tWHSf0ZL/VocFdEAtaKefyfBk43sy3A6enj3O0ZKVNOa/sa3BWRkBWb0Yi7Xw9cn95/EjitGe1Wy7J90OCuiIQtmCt3d+4dqdzX4K6IhCyYwJ9l/GaguC8iIQsm8GdTOZf2dSnjF5GgBRP4s4x/2YIutA+LiIQsvMDf16XBXREJWjCBf9fwGJHBIb1FTecUkaAFE/hH45hiIaIQmTJ+EQlaMIE/jp2CGcUo0uCuiAQtmMBfjqEQGVFkKOEXkZAFE/hjdyKDYmTK+EUkaGEF/siIzHQBl4gELZjAX67U+K2yWJuISIiCCfyVjD8yTecUkaAFE/izjL8QaT1+EQlbMIE/9mRWTzGKGNMWXCISsHACf+yYkQzuKuEXkYDludl6j5ndYmZ3mNldZvbJ9PgnzOz3ZrYh/Tkzrz5UK7snGX9B0zlFJGx57sC1H3iNu+82sxJwo5n9Z/rc59z9X3Jse4qsxq/pnCISutwCv7s7sDt9WEp/WlZkyWb1FCOjrMFdEQlYrjV+MyuY2QZgO3C1u9+cPvVhM9toZheZ2dJp3rvWzAbNbHBoaGjefYljiAyidB6/K/iLSKByDfzuXnb3NcARwMlm9iLgS8CxwBpgG/DZad57gbsPuPtAf3//vPtSdicyo2AGoAFeEQlWU2b1uPtO4HrgDHd/Iv1CiIGvAic3ow9xPD64C9pwXUTCleesnn4zW5Le7wVeC9xjZquqXvZWYFNefaiWzeqJsoxfcV9EApXnrJ5VwDozK5B8wfzA3a8ws2+Z2RqSgd6HgA/k2IeK2MHStXogy/gLzWhaRKSt5DmrZyNwYo3j5+TV5oEkG7Ekg7vJ41b0QkSk9YK5crec1fjTwK8pnSISqnACfzqrJ4o0uCsiYQsm8Hs6uFvQ4K6IBC6YwF+Ok4y/qIxfRAIXTuB3KhuxgDJ+EQlXMIE/m9WjwV0RCV0wgT+b1ZNl/GWl/CISqGACf+w+4QIubcIlIqEKKvBn6/GDBndFJFzBBP6s1FPQ4K6IBC6YwB+ns3o0nVNEQhdQ4PfKRizZYxGREAUT+LM9dzW4KyKhCybwx3Gy564Gd0UkdHmux99Wyumsnuvu2Q7AL+7ezkM79laef8cpR7WqayIiTZXnDlw9ZnaLmd1hZneZ2SfT48vM7Goz25Le1txsvdGywd200qMav4gEK89Sz37gNe7+UpKN1c8ws1OB84Fr3P044Jr0ce7iOBncNdPgroiELbfA74nd6cNS+uPAWcC69Pg64C159aFaZc9dzeMXkcDlOrhrZgUz2wBsB65295uBle6+DSC9XTHNe9ea2aCZDQ4NDc27L9myzCr1iEjocg387l529zXAEcDJZvaiWbz3AncfcPeB/v7+BvSFJONPSz0K+yISqqZM53T3ncD1wBnAE2a2CiC93d6MPpQrNf7kcRwr9ItImPKc1dNvZkvS+73Aa4F7gMuAc9OXnQtcmlcfqpU9mcdf0OCuiAQuz3n8q4B1ZlYg+YL5gbtfYWa/BX5gZu8DHgH+PMc+VMTplbvjs3qa0aqISPvJLfC7+0bgxBrHnwROy6vd6VRm9VilH83ugohIWwhiyQZ3xz2Zw58N7mrrRREJVRCBPyvrJKWe5L7ivoiEKojAX04jfyFCg7siErwgAn8W5KNIg7siImEFftPgrohIEIG/Uuqx6rV6FPhFJExBBP5sQbak1JMea113RERaKojAn03dLBiV6ZzK+EUkVEEE/qzGX71Im+K+iIQqjMCfRvlsRk9kGtwVkXAFEfjLVRk/JOUezeMXkVCFEfirZvVAsjSzSj0iEqogAn/1rB5Qxi8iYQsj8Fcu4CK9NWX8IhKsIAL/1Bq/1uoRkXAFEfizWT2RjZd6NKtHREJVV+A3s0vM7I1mVvcXhZkdaWbXmdlmM7vLzM5Lj3/CzH5vZhvSnzPn2vl6Tcn4I5V6RCRc9QbyLwHvALaY2afN7AV1vGcM+Ht3fyFwKvAhMzshfe5z7r4m/bly9t2encrgblrjN9OVuyISrroCv7v/wt3fCbwMeAi42sx+Y2bvNbPSNO/Z5u63pfefBTYDhzem27NTvTpndquwLyKhmk3p5lDgPcBfAbcD/0byRXB1He9dTbL/7s3poQ+b2UYzu8jMlk7znrVmNmhmg0NDQ/V2s6bxjVjGB3fLyvhFJFD11vh/DPwK6AP+i7u/2d0vdvePAAtneO9C4BLgo+6+i6RsdCywBtgGfLbW+9z9AncfcPeB/v7+es+npnLVRixpnzS4KyLBKtb5uq9NrsWbWbe773f3genelJaBLgG+4+4/BnD3J6qe/ypwxey7PTvuE6/cLWgev4gErN5Sz/+qcey3B3qDJSuiXQhsdvd/rTq+quplbwU21dmHOStXBnerl2xQ5BeRMB0w4zez55AMyPaa2YlAOi+GQ0jKPgfyCuAc4E4z25Ae+xhwtpmtAZxkoPgDc+n4bGT1/Cj9mkvm8efdqohIe5qp1PN6kgHdI4B/rTr+LEkQn5a738j4F0W13KdvThZPKvXoyl0RCdkBA7+7rwPWmdmfufslTepTw02e1WNapE1EAjZTqedd7v5tYLWZ/d3k56tr9+0sC/Jm1atztrJHIiKtM1OpZ0F6e8Apm+0urrFIW1kZv4gEaqZSz1fS2082pzv5yGb1VGr8kTE2psAvImGq9wKu/2Nmh5hZycyuMbMdZvauvDvXKFNn9WhwV0TCVe88/telV92+CdgKHA/8t9x61WBeY60eBX4RCVW9gT9biO1M4Hvu/lRO/cnF5GWZzayyYqeISGjqXbLhcjO7BxgGPmhm/cC+/LrVWOUpG7Go1CMi4ap3WebzgT8EBtx9FNgDnJVnxxpp6qweTecUkXDVm/EDvJBkPn/1e77Z4P7kIp40q8cMrc4pIsGqK/Cb2bdIllLeAJTTw85BEvjLlQu4kscFDe6KSMDqzfgHgBP8IE2T45pLNrSyRyIirVPvrJ5NwHPy7Eiepmy2rlKPiASs3ox/OXC3md0C7M8Ouvubc+lVg8VTZvUYZcV9EQlUvYH/E3l2Im9ZWSdN+DW4KyJBq3c65y9JNk0ppfdvBW470HvM7Egzu87MNpvZXWZ2Xnp8mZldbWZb0tuam6030pTN1iMN7opIuOpdq+f9wI+Ar6SHDgd+MsPbxoC/d/cXAqcCHzKzE4DzgWvc/TjgmvRxruJJm61HkPvg7nu+fgtX3fV4vo2IiMxBvYO7HyLZSnEXgLtvAVYc6A3uvs3db0vvPwtsJvnCOAtYl75sHfCWWfd6lioZf9XqnHmWesqxc/29Q6x/+Onc2hARmat6A/9+dx/JHqQXcdUdOc1sNXAicDOw0t23QfLlwDRfIGa21swGzWxwaGio3qZqGq/xV125m+NaPSNjyYfvHSnP8EoRkearN/D/0sw+RrLp+unAD4HL63mjmS0ELgE+mq7wWRd3v8DdB9x9oL+/v9631TRe6kke571WTxb494yM5daGiMhc1Rv4zweGgDuBD5BsmP4/Z3qTmZVIgv533P3H6eEnzGxV+vwqYPtsOz1bk0s9ZoaT38yekXTnl2Fl/CLShuqazunusZn9BPiJu9dVd7Fkg9sLgc2T9ua9DDgX+HR6e+msejwHU2b1pNM6Y4eCNb69LPCr1CMi7eiAGb8lPmFmO4B7gHvNbMjM/rGOz34FcA7wGjPbkP6cSRLwTzezLcDp6eNcuTtmEzdbh/zKPeM1fpV6RKT9zJTxf5QkgL/c3R8EMLNjgC+Z2d+6++eme6O73whMl0+fNoe+zlnZvRLsYTzzL8dOqdD49kaV8YtIG5upxv9u4Ows6AO4+wPAu9LnDgrleLy+D1AqJKedBehGyzJ+1fhFpB3NFPhL7r5j8sG0zl+q8fq2FLtXZvQAlNLC/lhOC/ZkNX7N6hGRdjRT4B+Z43NtJY59QsZfjJqT8avUIyLtaKYa/0vNrNbcewN6cuhPLibX+CsZf07rNlQH/mRgOYepQyIic3TAwO/uOQx9Nl8ce2WdHoBizjX+7HPLsTNSjukudsQfo4h0iHov4Dqold0rM3mgenA334wfNMArIu0njMAfM6HUU4yyUk9ONf6q3yT2KPCLSJsJIvC7O1UJf5Mzfs3sEZH2EkTgL8eTSz3ZdM78M37N7BGRdhNG4J80q6eYc8Y/WpXx79mvwC8i7SWIwB9PzvibWOMfHlWpR0TaSxiB35kQ+PPO+Ktr/Cr1iEi7CSLwl9PVOTPFtMaf25W7VV8oe1XqEZE2E0Tgn7xkQ2RGIbL81uqZkPGr1CMi7SWIwD95Vg8kM3tG86rxj8WVmUN7R5Xxi0h7yS3wm9lFZrbdzDZVHfuEmf1+0sYsuYt94gVcAKUoym0652g5ZmF3kchU6hGR9pNnxv8N4Iwaxz/n7mvSnytzbL9i8rLMkNT58xzc7SpGLOgqanBXRNpOboHf3W8Ansrr82ejPKnGD8nMnjwz/lIhoreroOmcItJ2WlHj/7CZbUxLQUub0WCS8deo8eeU8e8vJxl/X1dBF3CJSNtpduD/EnAssAbYBnx2uhea2VozGzSzwaGhoXk1WjPjj6JcB3e7ChF9KvWISBtqauB39yfcvezuMfBV4OQDvPYCdx9w94H+/v55tRtPWrIBkow/r+mco1UZv6Zziki7aWrgN7NVVQ/fCmya7rWNFMdMGdwt5Vjjr2T83cr4RaT9zLT14pyZ2feAVwHLzWwr8HHgVWa2BnDgIeADebVfrexOaVLkLxaiXGf1lAoRfaUCTzyzL5c2RETmKrfA7+5n1zh8YV7tHUg5rlHqifK7gGu0HLOgu5gM7qrUIyJtJogrd92nXrmbTOfMaVZPOo+/r7ugrRdFpO0EEfgnr8cP2XTO/DJ+zeoRkXYVRuCPpy7ZUIzyy/hH0lk9vaUCw6Nl4jifdkRE5iKIwJ9sxDLxWKlglN2JvfFBeXwefwGAfWPK+kWkfYQR+GvU+LMN1/PI+kfLTqlodBeTNqqXaRYRabUgAn+yEcvkwd38NmNJMv4CXcUk49+vwC8ibSSIwD95IxagMq8/r8BfnfHvH1XgF5H2EUTgL9eczpltuN7YUo+7M1KO6S5EdGWlnrJq/CLSPoII/HGNWT2lQj4Zf3Y1cFcxqmT8+5Txi0gbCSPwuzMp4R/P+Bs8uJt9kZQKEd0l1fhFpP0EEfhr77mbZvwNXrYhm8HTVYzoKmhWj4i0nyACf82NWKJ8Mv6Lb30UgA2P7uSX9yX7CPz8rscb2oaIyHwEEfin23oRGl/jL6eDxcXIKFa+XJTxi0j7CCLwx860Nf5GL82czRIqRNF44NeSDSLSRsII/HGtPXezK3cbm42PpWMGhcgqv1XktSaQiMhcBBH4y17jAq7K4G5jg3LNUo8yfhFpI7kFfjO7yMy2m9mmqmPLzOxqM9uS3i7Nq/1qtWb15FV/nxD4KxeJqcYvIu0jz4z/G8AZk46dD1zj7scB16SPc+fOtKWe/Gr8RjFSqUdE2k9ugd/dbwCemnT4LGBden8d8Ja82q9WrnEBVyEyIlPGLyLhaXaNf6W7bwNIb1dM90IzW2tmg2Y2ODQ0NK9Ga03nhGQzlkZP58yy+0IhIrLsy0UZv4i0j7Yd3HX3C9x9wN0H+vv75/w52e5Xk0s9AD2liH0Nvqq27OOlHkh3+tLgroi0kWYH/ifMbBVAers97wYrgbhGxp/HnrhZ6SgbPC4WTKUeEWkrzQ78lwHnpvfPBS7Nu8Fsa8VaGX9vV4G9I2MNba+6xp/dqtQjIu0kz+mc3wN+CzzfzLaa2fuATwOnm9kW4PT0ca6yZHvysswAfV2Fxmf88aRST0GlHhFpL8W8Ptjdz57mqdPyarOW8Zr71Of6ugoMNzjwj2f8UXprWqtHRNpK2w7uNkplmeQakT+p8Y/h3riMvDw5449MGb+ItJWOD/z7RpOMvrerMOW5vq4CsTd2o5TqtXpApR4RaT8dH/iH08DfU6od+IGG1vnHYseYlPGr1CMibaTzA//IgQJ/MsTRyJk9Y2WvXLEL2XROZfwi0j46PvDvH0tLPTUCf3askQO8I2MxXcXxtopRpOmcItJWOj7wD48kZZZmlXpGyjHdxfE/1kKkC7hEpL10fuAfnT7j7+tufKlnZCyeMIOoVNAFXCLSXsIJ/F1TTzX7Mmhoxj8W01WV8WutHhFpNx0f+PcdYFZPITJ6SlHDSz3VGX9Ba/WISJsJOvBDMrMn+62gESZn/CWt1SMibabjA382Y6dWjT873tAaf3lSqSe9gKuRVweLiMxHxwf+faPTz+qBxi/Utn/S4G62SueILuISkTbR8YF/eLRMVyGastl6ptGBf3TK4G4a+Bu84YuIyFx1fODfN1qmpzT9aWYLtTVCHPuUUk8hzf4buR6QiMh8BBH4ay3QlunrKrBvNG7Iejr70quEJ8zjTzN+BX4RaRe5rcd/IGb2EPAsUAbG3H0gr7aGR8vT1vdh/CKup/aOsGJRz7za2rM/DfwTBndV6hGR9tKSwJ96tbvvyLuR4ZHytDN6APoXdgNw3+O75x34sxlEE+bxR1mpp7EbvoiIzFXHl3pmyvgPW5wE+02PPTPvtvakYwWT5/ED7B9Vxi8i7aFVgd+Bq8xsvZmtrfUCM1trZoNmNjg0NDTnhvaPxgfM+Pu6iyzpK3HXY7vm3EYmmx00cXBX0zlFpL20KvC/wt1fBrwB+JCZ/cnkF7j7Be4+4O4D/f39c25oeIZZPQCHLe7lrt/PP+PPZgdNnMeflnqU8YtIm2hJ4Hf3x9Lb7cB/ACfn1dbwDLN6AA5b0sMDO/awe//8pnXWyvhLlYxfNX4RaQ9ND/xmtsDMFmX3gdcBm/Jqb98MNX6Aw5b0ArB52/zKPXtr1PgLqvGLSJtpRca/ErjRzO4AbgF+6u4/y6uxugL/4iTwb5pnuWdvjVk9pUgXcIlIe2n6dE53fwB4abPam2k6J8CiniL9i7q549Gd82prb415/AXN4xeRNtPR0zndnX1jB57VA2BmnHz0Mm564Kl5raJZq8ZfrFy5qxq/iLSHjg78o2WnHPuMs3oA/ujYQ3l81z4e3LFnzu3tHRmjGBmRjS8IV9JaPSLSZlp55W7uhmfYhKXak7tHAPi3a7ZwytGHVo6/45Sj6m5v70h5QrYPVYO7Cvwi0iY6OuPfV9lvd+bAf+iCLhb3lvjd0Nwz/j0jY1MCf1GBX0TaTBiBv46M38w4ZvkCHhjaTTzHOv/wSHnCjJ7scwuRaXBXRNpGRwf+2ZR6AJ63YiF7R8o8MMesf0+NUg8kF3ENN3B7RxGR+ejswD/DfruTvejwxSzuLXHV3Y/PaXbP8MjYlIwfYElvF1ufHp7154mI5KGjA/9M++1OVipEnPaCFWx9enhOV/Hu2V874z90YRcPPjn3sQMRkUbq8MCflXrqP80Tj1pK/8Jurtz0OKOzXFFzeHSawL+gm0ef2tuQXb7a0bP7Rrnoxgcrv2GJSHvr6MA/PItZPZlCZLx5zWE8tWeE6+7dPqv29uyvXepZvrCL0bLz2M59s/q8g8XFtz7KP11xN2u/NagL1UQOAp0d+GdZ488c27+QE49cwg33DfGb39W/SdjwNIO7h6a7fHVquWf9w0/TWyrwqy07+Oefbm51d0RkBh0d+LPNz+ut8Vd744tXsXxhN+/9+q1ce88TM77e3WvO44ekxg/w0DyuCm5X7s7gw0/z+j9YyRtfsoorN81tYFxEmqejA3+W8c8l8Pd1F3n/K4/heSsW8r51g3z2qnsPWPPfNxoTOzVLPYu6iyzoKsxrOYh29ehTwww9u5+TVi/jT4/rZ+jZ/WzZvrvV3RKRA+jowD+bC7hqWdBd5M9POpITj1zKF669nz/+zLX80+V3852bHp7y2qvufhyAVekSz9XMjNXLF/BQE0o9V931OO//5iC79o3m3tZ3b36EL1y7BYDtu/axY/d+AG7cUn95TESar8MDf0whssouWHPRVYx420lH8K5TjmL/aMxFv36Qf7/ufn60fmtlINPdufDGBzlm+QKOW7mw5uesXr4g91LPlXdu42++cxtX3/0EX77+d7m2lXn4yb10FyNWHtLDkr4uDl3QxY33K/CLtLOOX6StpxhhNvfAnznhsMUct3IRGx7dya/v38F//eEdfPzSTQysXsax/QvZuPUZPnXWH0xYmbPa0Ycu4GebHq9rY5i5+P3OYf724g2sOXIJKxZ1c+GND/LOU5/L4Uum/gbSKLE7vxvazVHL+irn/bwVC7npgScZLceVlUlFpL205H+mmZ1hZvea2f1mdn5e7dSz3+5slAoRL1+9jPNOO46/fMXRvOjwxWzetouLfv0gfV0FxuLpBzVPPeZQyrFzzoU3s31XY6d1fvfmR/jgt9dTjp3TXrCCFx2+mHLs/N3FG+a9j/CBbHhkJ0/uGeHEo5ZWjh2/chF7R8p88brm/MYhIrNnzZ6BYWYF4D7gdGArcCtwtrvfPd17BgYGfHBwcNZtlWNnZCyeEPy/e/Mjs/6cmezeP0bsziE9pQO+buPWnfxo/VbKsXP40l6W9nXx8tXLWHlINysO6WZJbxfdpYieUoGeYoGeUkQhXd8/iozIoGCGWXo/MsZi51NX3M2lGx7j1c/v5/QTngPAhkd3csltWzl+5SLedtIRPHdZH31dBXq6CvSWCvSlt13FCMOwCCIzjPTWSH5I2sraNDPcncd37eP1n7uBQ3pL/PWfHlvJ+GN3bn8kafvtA0fyiuOW07+wm0N6ixzSU6JUiIgiKEbJuRUio5jeTvfb0mTuztN7R9n69F62Pj2MA0cs7eWIpb0s6+uq6v/8f9PL/n9k/03GYmf7s/vYNTyG4yzqLrG4t8SiniJRNP/2qtstx07syZ9p7M7ekTI7945QKkT0dhXo6yrSVyo0tN3Jxsoxu/aN8czwKCNjceXvsa+r0JA/X8mXma1394HJx1tR6jkZuD/dghEz+z5wFjBt4J+rQmQNzfins7C7vj/GlxyxhOcs7mHj1md4cMceHts5zLdvepiRBlzRu2pxD39yXH/l8Zojl9BTirj8jsf41BUN/6OtOPvkoyYE7MiMNUcu4ZGn9nDJbVu5ePDR3NqeSfLFNf5FBuMBPEt3KoGdic/PRbb3QnU4zNo1KncmvKa6X0mATxKW2egqRBMbzT67xmuni9VW49Xl2Kf9t5n92WbB35h0rlXn2czvh1rnkVtbTWrqK+ecxCur/m83Qisy/rcBZ7j7X6WPzwFOcfcPT3rdWmBt+vD5wL3zaHY5ENKIo863s+l8O1sjz/e57j7lW6MVGX+t78kp3z7ufgFwQUMaNBus9etOp9L5djadb2drxvm2YnB3K3Bk1eMjgMda0A8RkSC1IvDfChxnZkebWRfwF8BlLeiHiEiQml7qcfcxM/sw8HOgAFzk7nfl3GxDSkYHEZ1vZ9P5drbcz7fpg7siItJaurRSRCQwCvwiIoHpqMA/01IQlvh/6fMbzexlrehno9Rxvu9Mz3Ojmf3GzF7ain42Sr1LfZjZy82snF4zclCq51zN7FVmtsHM7jKzXza7j41Ux7/lxWZ2uZndkZ7ve1vRz0Yxs4vMbLuZbZrm+Xxjlbt3xA/JQPHvgGOALuAO4IRJrzkT+E+SawlOBW5udb9zPt8/Apam99/Q6edb9bprgSuBt7W63zn+3S4hudr9qPTxilb3O+fz/RjwmfR+P/AU0NXqvs/jnP8EeBmwaZrnc41VnZTxV5aCcPcRIFsKotpZwDc9cROwxMxWNbujDTLj+br7b9z96fThTSTXTBys6vn7BfgIcAkwuw2T20s95/oO4Mfu/giAu3f6+TqwyJI1IhaSBP78ViDMmbvfQHIO08k1VnVS4D8cqF4YZmt6bLavOVjM9lzeR5JBHKxmPF8zOxx4K/DlJvYrD/X83R4PLDWz681svZm9u2m9a7x6zvffgReSXOx5J3Ceu89/kav2lWus6qT1+OtZCqKu5SIOEnWfi5m9miTw/3GuPcpXPef7eeAf3L18kK8cWc+5FoGTgNOAXuC3ZnaTu9+Xd+dyUM/5vh7YALwGOBa42sx+5e67cu5bq+Qaqzop8NezFEQnLRdR17mY2UuArwFvcPcnm9S3PNRzvgPA99Ogvxw408zG3P0nTelh49T7b3mHu+8B9pjZDcBLSZY8P9jUc77vBT7tSQH8fjN7EHgBcEtzuth0ucaqTir11LMUxGXAu9MR81OBZ9x9W7M72iAznq+ZHQX8GDjnIM0Eq814vu5+tLuvdvfVwI+ADx6EQR/q+7d8KfBKMyuaWR9wCrC5yf1slHrO9xGS324ws5UkK/Y+0NReNleusapjMn6fZikIM/vr9Pkvk8z0OBO4H9hLkkUclOo8338EDgW+mGbBY36QrnJY5/l2hHrO1d03m9nPgI1ADHzN3WtODWx3df7dfgr4hpndSVIG+Qd3P2iXajaz7wGvApab2Vbg40AJmhOrtGSDiEhgOqnUIyIidVDgFxEJjAK/iEhgFPhFRAKjwC8iEhgFfhGRwCjwi+TEzDrmOhnpLAr8IlXMbIGZ/TRd932Tmb09Xd//N+mxW8xskZn1mNnXzexOM7s9XQ8JM3uPmf3QzC4Hrko/7yIzuzV9Xa0VRUWaShmJyERnAI+5+xsh2QAEuB14u7vfamaHAMPAeQDu/mIzewFJkD8+/Yw/BF7i7k+Z2T8D17r7X5rZEuAWM/tFusaOSEso4xeZ6E7gtWb2GTN7JXAUsM3dbwVw913uPkay0um30mP3AA+TLJUMcLW7Z2utvw4438w2ANcDPelnirSMMn6RKu5+n5mdRLJOyv8GrqL2crgHWve5Ops34M/c/d7G9VJkfpTxi1Qxs8OAve7+beBfSLa9O8zMXp4+vygdtL0BeGd67HiSLL5WcP858JF05yjM7MT8z0LkwJTxi0z0YuD/mlkMjAJ/Q5K1f8HMeknq+68Fvgh8OV0tcgx4j7vvr7EBzKdINojZmAb/h4A3NeE8RKal1TlFRAKjUo+ISGAU+EVEAqPALyISGAV+EZHAKPCLiARGgV9EJDAK/CIigfn/qaDdsiRey08AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns, numpy as np\n",
    "ax = sns.distplot(df_full[\"score\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19fe48da-33b4-45dd-95cf-16ed40156b99",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
